Search in sources :

Example 1 with GluePartitionConverter

use of io.trino.plugin.hive.metastore.glue.converter.GlueToTrinoConverter.GluePartitionConverter in project trino by trinodb.

the class GlueHiveMetastore method batchGetPartition.

private List<Partition> batchGetPartition(Table table, List<String> partitionNames) {
    try {
        List<PartitionValueList> pendingPartitions = partitionNames.stream().map(partitionName -> new PartitionValueList().withValues(toPartitionValues(partitionName))).collect(toCollection(ArrayList::new));
        ImmutableList.Builder<Partition> resultsBuilder = ImmutableList.builderWithExpectedSize(partitionNames.size());
        // Reuse immutable field instances opportunistically between partitions
        GluePartitionConverter converter = new GluePartitionConverter(table);
        while (!pendingPartitions.isEmpty()) {
            List<Future<BatchGetPartitionResult>> batchGetPartitionFutures = new ArrayList<>();
            for (List<PartitionValueList> partitions : Lists.partition(pendingPartitions, BATCH_GET_PARTITION_MAX_PAGE_SIZE)) {
                long startTimestamp = System.currentTimeMillis();
                batchGetPartitionFutures.add(glueClient.batchGetPartitionAsync(new BatchGetPartitionRequest().withCatalogId(catalogId).withDatabaseName(table.getDatabaseName()).withTableName(table.getTableName()).withPartitionsToGet(partitions), new StatsRecordingAsyncHandler(stats.getGetPartitions(), startTimestamp)));
            }
            pendingPartitions.clear();
            for (Future<BatchGetPartitionResult> future : batchGetPartitionFutures) {
                BatchGetPartitionResult batchGetPartitionResult = future.get();
                List<com.amazonaws.services.glue.model.Partition> partitions = batchGetPartitionResult.getPartitions();
                List<PartitionValueList> unprocessedKeys = batchGetPartitionResult.getUnprocessedKeys();
                // In the unlikely scenario where batchGetPartition call cannot make progress on retrieving partitions, avoid infinite loop
                if (partitions.isEmpty()) {
                    verify(!unprocessedKeys.isEmpty(), "Empty unprocessedKeys for non-empty BatchGetPartitionRequest and empty partitions result");
                    throw new TrinoException(HIVE_METASTORE_ERROR, "Cannot make progress retrieving partitions. Unable to retrieve partitions: " + unprocessedKeys);
                }
                partitions.stream().map(converter).forEach(resultsBuilder::add);
                pendingPartitions.addAll(unprocessedKeys);
            }
        }
        return resultsBuilder.build();
    } catch (AmazonServiceException | InterruptedException | ExecutionException e) {
        if (e instanceof InterruptedException) {
            Thread.currentThread().interrupt();
        }
        throw new TrinoException(HIVE_METASTORE_ERROR, e);
    }
}
Also used : ThriftMetastoreUtil.updateStatisticsParameters(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.updateStatisticsParameters) AWSStaticCredentialsProvider(com.amazonaws.auth.AWSStaticCredentialsProvider) UnaryOperator.identity(java.util.function.UnaryOperator.identity) DefaultAWSCredentialsProviderChain(com.amazonaws.auth.DefaultAWSCredentialsProviderChain) USER(io.trino.spi.security.PrincipalType.USER) RequestMetricCollector(com.amazonaws.metrics.RequestMetricCollector) DeleteTableRequest(com.amazonaws.services.glue.model.DeleteTableRequest) ColumnStatisticType(io.trino.spi.statistics.ColumnStatisticType) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) Future(java.util.concurrent.Future) GetDatabasesResult(com.amazonaws.services.glue.model.GetDatabasesResult) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) Column(io.trino.plugin.hive.metastore.Column) Map(java.util.Map) PartitionWithStatistics(io.trino.plugin.hive.metastore.PartitionWithStatistics) BatchCreatePartitionRequest(com.amazonaws.services.glue.model.BatchCreatePartitionRequest) GetTablesResult(com.amazonaws.services.glue.model.GetTablesResult) RequestHandler2(com.amazonaws.handlers.RequestHandler2) AcidTransaction(io.trino.plugin.hive.acid.AcidTransaction) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) Table(io.trino.plugin.hive.metastore.Table) ConnectorIdentity(io.trino.spi.security.ConnectorIdentity) AmazonServiceException(com.amazonaws.AmazonServiceException) GlueToTrinoConverter.mappedCopy(io.trino.plugin.hive.metastore.glue.converter.GlueToTrinoConverter.mappedCopy) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) DeletePartitionRequest(com.amazonaws.services.glue.model.DeletePartitionRequest) Set(java.util.Set) DatabaseInput(com.amazonaws.services.glue.model.DatabaseInput) TableInput(com.amazonaws.services.glue.model.TableInput) UpdateTableRequest(com.amazonaws.services.glue.model.UpdateTableRequest) MANAGED_TABLE(org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE) SchemaTableName(io.trino.spi.connector.SchemaTableName) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) BatchUpdatePartitionRequestEntry(com.amazonaws.services.glue.model.BatchUpdatePartitionRequestEntry) PartitionInput(com.amazonaws.services.glue.model.PartitionInput) GlueInputConverter.convertPartition(io.trino.plugin.hive.metastore.glue.converter.GlueInputConverter.convertPartition) EntityNotFoundException(com.amazonaws.services.glue.model.EntityNotFoundException) AWSGlueAsync(com.amazonaws.services.glue.AWSGlueAsync) Partition(io.trino.plugin.hive.metastore.Partition) GetPartitionsRequest(com.amazonaws.services.glue.model.GetPartitionsRequest) Segment(com.amazonaws.services.glue.model.Segment) PartitionStatistics(io.trino.plugin.hive.PartitionStatistics) HivePrincipal(io.trino.plugin.hive.metastore.HivePrincipal) HiveUtil(io.trino.plugin.hive.util.HiveUtil) Iterables(com.google.common.collect.Iterables) GetPartitionResult(com.amazonaws.services.glue.model.GetPartitionResult) Strings.isNullOrEmpty(com.google.common.base.Strings.isNullOrEmpty) PartitionNotFoundException(io.trino.plugin.hive.PartitionNotFoundException) ColumnNotFoundException(io.trino.spi.connector.ColumnNotFoundException) ArrayList(java.util.ArrayList) HiveType(io.trino.plugin.hive.HiveType) OptionalLong(java.util.OptionalLong) Comparators.lexicographical(com.google.common.collect.Comparators.lexicographical) Lists(com.google.common.collect.Lists) HiveMetastore(io.trino.plugin.hive.metastore.HiveMetastore) AlreadyExistsException(com.amazonaws.services.glue.model.AlreadyExistsException) AWSCredentialsProvider(com.amazonaws.auth.AWSCredentialsProvider) CreateTableRequest(com.amazonaws.services.glue.model.CreateTableRequest) SchemaAlreadyExistsException(io.trino.plugin.hive.SchemaAlreadyExistsException) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) CreateDatabaseRequest(com.amazonaws.services.glue.model.CreateDatabaseRequest) HiveWriteUtils(io.trino.plugin.hive.util.HiveWriteUtils) Nullable(javax.annotation.Nullable) Executor(java.util.concurrent.Executor) MoreFutures(io.airlift.concurrent.MoreFutures) PartitionValueList(com.amazonaws.services.glue.model.PartitionValueList) GetTableResult(com.amazonaws.services.glue.model.GetTableResult) RoleGrant(io.trino.spi.security.RoleGrant) ExecutionException(java.util.concurrent.ExecutionException) ClientConfiguration(com.amazonaws.ClientConfiguration) AwsSdkUtil.getPaginatedResults(io.trino.plugin.hive.metastore.glue.AwsSdkUtil.getPaginatedResults) AsyncHandler(com.amazonaws.handlers.AsyncHandler) GetPartitionsResult(com.amazonaws.services.glue.model.GetPartitionsResult) HivePrivilege(io.trino.plugin.hive.metastore.HivePrivilegeInfo.HivePrivilege) ThriftMetastoreUtil.getHiveBasicStatistics(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.getHiveBasicStatistics) MetastoreUtil.makePartitionName(io.trino.plugin.hive.metastore.MetastoreUtil.makePartitionName) HiveUtil.toPartitionValues(io.trino.plugin.hive.util.HiveUtil.toPartitionValues) Database(io.trino.plugin.hive.metastore.Database) GetDatabaseRequest(com.amazonaws.services.glue.model.GetDatabaseRequest) SchemaNotFoundException(io.trino.spi.connector.SchemaNotFoundException) CompletionService(java.util.concurrent.CompletionService) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) GetDatabasesRequest(com.amazonaws.services.glue.model.GetDatabasesRequest) Collectors.toMap(java.util.stream.Collectors.toMap) ALREADY_EXISTS(io.trino.spi.StandardErrorCode.ALREADY_EXISTS) Path(org.apache.hadoop.fs.Path) BatchUpdatePartitionResult(com.amazonaws.services.glue.model.BatchUpdatePartitionResult) ImmutableSet(com.google.common.collect.ImmutableSet) AWSGlueAsyncClientBuilder(com.amazonaws.services.glue.AWSGlueAsyncClientBuilder) ImmutableMap(com.google.common.collect.ImmutableMap) Predicate(java.util.function.Predicate) GluePartitionConverter(io.trino.plugin.hive.metastore.glue.converter.GlueToTrinoConverter.GluePartitionConverter) TableAlreadyExistsException(io.trino.plugin.hive.TableAlreadyExistsException) TrinoException(io.trino.spi.TrinoException) STSAssumeRoleSessionCredentialsProvider(com.amazonaws.auth.STSAssumeRoleSessionCredentialsProvider) String.format(java.lang.String.format) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) List(java.util.List) GetTableRequest(com.amazonaws.services.glue.model.GetTableRequest) PartitionError(com.amazonaws.services.glue.model.PartitionError) Entry(java.util.Map.Entry) Optional(java.util.Optional) HivePrivilegeInfo(io.trino.plugin.hive.metastore.HivePrivilegeInfo) UpdateDatabaseRequest(com.amazonaws.services.glue.model.UpdateDatabaseRequest) ExecutorCompletionService(java.util.concurrent.ExecutorCompletionService) Logger(io.airlift.log.Logger) Type(io.trino.spi.type.Type) Function(java.util.function.Function) Inject(javax.inject.Inject) EndpointConfiguration(com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration) GetPartitionRequest(com.amazonaws.services.glue.model.GetPartitionRequest) Collectors.toCollection(java.util.stream.Collectors.toCollection) HiveColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) HIVE_METASTORE_ERROR(io.trino.plugin.hive.HiveErrorCode.HIVE_METASTORE_ERROR) Objects.requireNonNull(java.util.Objects.requireNonNull) GlueInputConverter(io.trino.plugin.hive.metastore.glue.converter.GlueInputConverter) DeleteDatabaseRequest(com.amazonaws.services.glue.model.DeleteDatabaseRequest) Comparator.comparing(java.util.Comparator.comparing) VIRTUAL_VIEW(org.apache.hadoop.hive.metastore.TableType.VIRTUAL_VIEW) AwsCurrentRegionHolder.getCurrentRegionFromEC2Metadata(io.trino.plugin.hive.aws.AwsCurrentRegionHolder.getCurrentRegionFromEC2Metadata) BatchGetPartitionRequest(com.amazonaws.services.glue.model.BatchGetPartitionRequest) AmazonWebServiceRequest(com.amazonaws.AmazonWebServiceRequest) BatchCreatePartitionResult(com.amazonaws.services.glue.model.BatchCreatePartitionResult) BasicAWSCredentials(com.amazonaws.auth.BasicAWSCredentials) BatchGetPartitionResult(com.amazonaws.services.glue.model.BatchGetPartitionResult) ErrorDetail(com.amazonaws.services.glue.model.ErrorDetail) TupleDomain(io.trino.spi.predicate.TupleDomain) BatchUpdatePartitionRequest(com.amazonaws.services.glue.model.BatchUpdatePartitionRequest) GlueToTrinoConverter(io.trino.plugin.hive.metastore.glue.converter.GlueToTrinoConverter) GetDatabaseResult(com.amazonaws.services.glue.model.GetDatabaseResult) GetTablesRequest(com.amazonaws.services.glue.model.GetTablesRequest) MetastoreUtil.verifyCanDropColumn(io.trino.plugin.hive.metastore.MetastoreUtil.verifyCanDropColumn) UpdatePartitionRequest(com.amazonaws.services.glue.model.UpdatePartitionRequest) PrincipalPrivileges(io.trino.plugin.hive.metastore.PrincipalPrivileges) Comparator(java.util.Comparator) GlueInputConverter.convertPartition(io.trino.plugin.hive.metastore.glue.converter.GlueInputConverter.convertPartition) Partition(io.trino.plugin.hive.metastore.Partition) BatchGetPartitionRequest(com.amazonaws.services.glue.model.BatchGetPartitionRequest) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) ArrayList(java.util.ArrayList) BatchGetPartitionResult(com.amazonaws.services.glue.model.BatchGetPartitionResult) GluePartitionConverter(io.trino.plugin.hive.metastore.glue.converter.GlueToTrinoConverter.GluePartitionConverter) PartitionValueList(com.amazonaws.services.glue.model.PartitionValueList) AmazonServiceException(com.amazonaws.AmazonServiceException) Future(java.util.concurrent.Future) TrinoException(io.trino.spi.TrinoException) ExecutionException(java.util.concurrent.ExecutionException)

Example 2 with GluePartitionConverter

use of io.trino.plugin.hive.metastore.glue.converter.GlueToTrinoConverter.GluePartitionConverter in project trino by trinodb.

the class GlueHiveMetastore method getPartitions.

private List<Partition> getPartitions(Table table, String expression, @Nullable Segment segment) {
    try {
        // Reuse immutable field instances opportunistically between partitions
        GluePartitionConverter converter = new GluePartitionConverter(table);
        List<Partition> partitions = getPaginatedResults(glueClient::getPartitions, new GetPartitionsRequest().withCatalogId(catalogId).withDatabaseName(table.getDatabaseName()).withTableName(table.getTableName()).withExpression(expression).withSegment(segment).withMaxResults(AWS_GLUE_GET_PARTITIONS_MAX_RESULTS), GetPartitionsRequest::setNextToken, GetPartitionsResult::getNextToken, stats.getGetPartitions()).map(GetPartitionsResult::getPartitions).flatMap(List::stream).map(converter).collect(toImmutableList());
        return partitions;
    } catch (AmazonServiceException e) {
        throw new TrinoException(HIVE_METASTORE_ERROR, e);
    }
}
Also used : GlueInputConverter.convertPartition(io.trino.plugin.hive.metastore.glue.converter.GlueInputConverter.convertPartition) Partition(io.trino.plugin.hive.metastore.Partition) GetPartitionsResult(com.amazonaws.services.glue.model.GetPartitionsResult) AmazonServiceException(com.amazonaws.AmazonServiceException) TrinoException(io.trino.spi.TrinoException) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ArrayList(java.util.ArrayList) PartitionValueList(com.amazonaws.services.glue.model.PartitionValueList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) GluePartitionConverter(io.trino.plugin.hive.metastore.glue.converter.GlueToTrinoConverter.GluePartitionConverter) GetPartitionsRequest(com.amazonaws.services.glue.model.GetPartitionsRequest)

Example 3 with GluePartitionConverter

use of io.trino.plugin.hive.metastore.glue.converter.GlueToTrinoConverter.GluePartitionConverter in project trino by trinodb.

the class TestGlueToTrinoConverter method testConvertPartition.

@Test
public void testConvertPartition() {
    GluePartitionConverter converter = createPartitionConverter(testTable);
    io.trino.plugin.hive.metastore.Partition trinoPartition = converter.apply(testPartition);
    assertEquals(trinoPartition.getDatabaseName(), testPartition.getDatabaseName());
    assertEquals(trinoPartition.getTableName(), testPartition.getTableName());
    assertColumnList(trinoPartition.getColumns(), testPartition.getStorageDescriptor().getColumns());
    assertEquals(trinoPartition.getValues(), testPartition.getValues());
    assertStorage(trinoPartition.getStorage(), testPartition.getStorageDescriptor());
    assertEquals(trinoPartition.getParameters(), testPartition.getParameters());
}
Also used : GluePartitionConverter(io.trino.plugin.hive.metastore.glue.converter.GlueToTrinoConverter.GluePartitionConverter) Test(org.testng.annotations.Test)

Example 4 with GluePartitionConverter

use of io.trino.plugin.hive.metastore.glue.converter.GlueToTrinoConverter.GluePartitionConverter in project trino by trinodb.

the class TestGlueToTrinoConverter method testPartitionConversionMemoization.

@Test
public void testPartitionConversionMemoization() {
    String fakeS3Location = "s3://some-fake-location";
    testPartition.getStorageDescriptor().setLocation(fakeS3Location);
    // Second partition to convert with equal (but not aliased) values
    Partition partitionTwo = getGlueTestPartition("" + testDatabase.getName(), "" + testTable.getName(), new ArrayList<>(testPartition.getValues()));
    // Ensure storage fields match as well
    partitionTwo.getStorageDescriptor().setColumns(new ArrayList<>(testPartition.getStorageDescriptor().getColumns()));
    partitionTwo.getStorageDescriptor().setBucketColumns(new ArrayList<>(testPartition.getStorageDescriptor().getBucketColumns()));
    partitionTwo.getStorageDescriptor().setLocation("" + fakeS3Location);
    partitionTwo.getStorageDescriptor().setInputFormat("" + testPartition.getStorageDescriptor().getInputFormat());
    partitionTwo.getStorageDescriptor().setOutputFormat("" + testPartition.getStorageDescriptor().getOutputFormat());
    partitionTwo.getStorageDescriptor().setParameters(new HashMap<>(testPartition.getStorageDescriptor().getParameters()));
    GluePartitionConverter converter = createPartitionConverter(testTable);
    io.trino.plugin.hive.metastore.Partition trinoPartition = converter.apply(testPartition);
    io.trino.plugin.hive.metastore.Partition trinoPartition2 = converter.apply(partitionTwo);
    assertNotSame(trinoPartition, trinoPartition2);
    assertSame(trinoPartition2.getDatabaseName(), trinoPartition.getDatabaseName());
    assertSame(trinoPartition2.getTableName(), trinoPartition.getTableName());
    assertSame(trinoPartition2.getColumns(), trinoPartition.getColumns());
    assertSame(trinoPartition2.getParameters(), trinoPartition.getParameters());
    assertNotSame(trinoPartition2.getValues(), trinoPartition.getValues());
    Storage storage = trinoPartition.getStorage();
    Storage storage2 = trinoPartition2.getStorage();
    assertSame(storage2.getStorageFormat(), storage.getStorageFormat());
    assertSame(storage2.getBucketProperty(), storage.getBucketProperty());
    assertSame(storage2.getSerdeParameters(), storage.getSerdeParameters());
    assertNotSame(storage2.getLocation(), storage.getLocation());
}
Also used : Partition(com.amazonaws.services.glue.model.Partition) TestingMetastoreObjects.getGlueTestPartition(io.trino.plugin.hive.metastore.glue.TestingMetastoreObjects.getGlueTestPartition) Storage(io.trino.plugin.hive.metastore.Storage) GluePartitionConverter(io.trino.plugin.hive.metastore.glue.converter.GlueToTrinoConverter.GluePartitionConverter) Test(org.testng.annotations.Test)

Aggregations

GluePartitionConverter (io.trino.plugin.hive.metastore.glue.converter.GlueToTrinoConverter.GluePartitionConverter)4 AmazonServiceException (com.amazonaws.AmazonServiceException)2 GetPartitionsRequest (com.amazonaws.services.glue.model.GetPartitionsRequest)2 GetPartitionsResult (com.amazonaws.services.glue.model.GetPartitionsResult)2 PartitionValueList (com.amazonaws.services.glue.model.PartitionValueList)2 ImmutableList (com.google.common.collect.ImmutableList)2 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)2 Partition (io.trino.plugin.hive.metastore.Partition)2 GlueInputConverter.convertPartition (io.trino.plugin.hive.metastore.glue.converter.GlueInputConverter.convertPartition)2 TrinoException (io.trino.spi.TrinoException)2 ArrayList (java.util.ArrayList)2 List (java.util.List)2 Test (org.testng.annotations.Test)2 AmazonWebServiceRequest (com.amazonaws.AmazonWebServiceRequest)1 ClientConfiguration (com.amazonaws.ClientConfiguration)1 AWSCredentialsProvider (com.amazonaws.auth.AWSCredentialsProvider)1 AWSStaticCredentialsProvider (com.amazonaws.auth.AWSStaticCredentialsProvider)1 BasicAWSCredentials (com.amazonaws.auth.BasicAWSCredentials)1 DefaultAWSCredentialsProviderChain (com.amazonaws.auth.DefaultAWSCredentialsProviderChain)1 STSAssumeRoleSessionCredentialsProvider (com.amazonaws.auth.STSAssumeRoleSessionCredentialsProvider)1