Search in sources :

Example 96 with Table

use of io.trino.plugin.hive.metastore.Table in project trino by trinodb.

the class FileHiveMetastore method replaceTable.

@Override
public synchronized void replaceTable(String databaseName, String tableName, Table newTable, PrincipalPrivileges principalPrivileges) {
    Table table = getRequiredTable(databaseName, tableName);
    if (!table.getDatabaseName().equals(databaseName) || !table.getTableName().equals(tableName)) {
        throw new TrinoException(HIVE_METASTORE_ERROR, "Replacement table must have same name");
    }
    Path tableMetadataDirectory = getTableMetadataDirectory(table);
    writeSchemaFile(TABLE, tableMetadataDirectory, tableCodec, new TableMetadata(currentVersion, newTable), true);
    // replace existing permissions
    deleteTablePrivileges(table);
    for (Entry<String, Collection<HivePrivilegeInfo>> entry : principalPrivileges.getUserPrivileges().asMap().entrySet()) {
        setTablePrivileges(new HivePrincipal(USER, entry.getKey()), table.getDatabaseName(), table.getTableName(), entry.getValue());
    }
    for (Entry<String, Collection<HivePrivilegeInfo>> entry : principalPrivileges.getRolePrivileges().asMap().entrySet()) {
        setTablePrivileges(new HivePrincipal(ROLE, entry.getKey()), table.getDatabaseName(), table.getTableName(), entry.getValue());
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Table(io.trino.plugin.hive.metastore.Table) HiveUtil.isIcebergTable(io.trino.plugin.hive.util.HiveUtil.isIcebergTable) HivePrincipal(io.trino.plugin.hive.metastore.HivePrincipal) TrinoException(io.trino.spi.TrinoException) Collection(java.util.Collection)

Example 97 with Table

use of io.trino.plugin.hive.metastore.Table in project trino by trinodb.

the class FileHiveMetastore method alterPartition.

@Override
public synchronized void alterPartition(String databaseName, String tableName, PartitionWithStatistics partitionWithStatistics) {
    Table table = getRequiredTable(databaseName, tableName);
    Partition partition = partitionWithStatistics.getPartition();
    verifiedPartition(table, partition);
    Path partitionMetadataDirectory = getPartitionMetadataDirectory(table, partition.getValues());
    writeSchemaFile(PARTITION, partitionMetadataDirectory, partitionCodec, new PartitionMetadata(table, partitionWithStatistics), true);
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(io.trino.plugin.hive.metastore.Partition) Table(io.trino.plugin.hive.metastore.Table) HiveUtil.isIcebergTable(io.trino.plugin.hive.util.HiveUtil.isIcebergTable)

Example 98 with Table

use of io.trino.plugin.hive.metastore.Table in project trino by trinodb.

the class FileHiveMetastore method getAllPartitionNames.

private synchronized Optional<List<String>> getAllPartitionNames(String databaseName, String tableName) {
    requireNonNull(databaseName, "databaseName is null");
    requireNonNull(tableName, "tableName is null");
    Optional<Table> tableReference = getTable(databaseName, tableName);
    if (tableReference.isEmpty()) {
        return Optional.empty();
    }
    Table table = tableReference.get();
    Path tableMetadataDirectory = getTableMetadataDirectory(table);
    List<ArrayDeque<String>> partitions = listPartitions(tableMetadataDirectory, table.getPartitionColumns());
    List<String> partitionNames = partitions.stream().map(partitionValues -> makePartitionName(table.getPartitionColumns(), ImmutableList.copyOf(partitionValues))).filter(partitionName -> isValidPartition(table, partitionName)).collect(toList());
    return Optional.of(ImmutableList.copyOf(partitionNames));
}
Also used : Path(org.apache.hadoop.fs.Path) ThriftMetastoreUtil.updateStatisticsParameters(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.updateStatisticsParameters) Arrays(java.util.Arrays) FileSystem(org.apache.hadoop.fs.FileSystem) USER(io.trino.spi.security.PrincipalType.USER) FileStatus(org.apache.hadoop.fs.FileStatus) ColumnStatisticType(io.trino.spi.statistics.ColumnStatisticType) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) DATABASE(io.trino.plugin.hive.metastore.file.FileHiveMetastore.SchemaType.DATABASE) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) Column(io.trino.plugin.hive.metastore.Column) Map(java.util.Map) PartitionWithStatistics(io.trino.plugin.hive.metastore.PartitionWithStatistics) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) EnumSet(java.util.EnumSet) TABLE_COMMENT(io.trino.plugin.hive.HiveMetadata.TABLE_COMMENT) AcidTransaction(io.trino.plugin.hive.acid.AcidTransaction) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) Table(io.trino.plugin.hive.metastore.Table) ConnectorIdentity(io.trino.spi.security.ConnectorIdentity) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) ThreadSafe(javax.annotation.concurrent.ThreadSafe) GuardedBy(javax.annotation.concurrent.GuardedBy) MANAGED_TABLE(org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE) SchemaTableName(io.trino.spi.connector.SchemaTableName) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) OWNERSHIP(io.trino.plugin.hive.metastore.HivePrivilegeInfo.HivePrivilege.OWNERSHIP) HdfsConfig(io.trino.plugin.hive.HdfsConfig) ByteStreams(com.google.common.io.ByteStreams) HdfsConfigurationInitializer(io.trino.plugin.hive.HdfsConfigurationInitializer) Partition(io.trino.plugin.hive.metastore.Partition) MoreObjects.toStringHelper(com.google.common.base.MoreObjects.toStringHelper) PartitionStatistics(io.trino.plugin.hive.PartitionStatistics) HivePrincipal(io.trino.plugin.hive.metastore.HivePrincipal) PartitionNotFoundException(io.trino.plugin.hive.PartitionNotFoundException) ColumnNotFoundException(io.trino.spi.connector.ColumnNotFoundException) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) HiveType(io.trino.plugin.hive.HiveType) ThriftMetastoreUtil(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil) HiveMetastore(io.trino.plugin.hive.metastore.HiveMetastore) NodeVersion(io.trino.plugin.hive.NodeVersion) SchemaAlreadyExistsException(io.trino.plugin.hive.SchemaAlreadyExistsException) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) LinkedHashSet(java.util.LinkedHashSet) HiveUtil.isIcebergTable(io.trino.plugin.hive.util.HiveUtil.isIcebergTable) SPARK_TABLE_PROVIDER_KEY(io.trino.plugin.hive.util.HiveUtil.SPARK_TABLE_PROVIDER_KEY) IOException(java.io.IOException) HdfsConfiguration(io.trino.plugin.hive.HdfsConfiguration) RoleGrant(io.trino.spi.security.RoleGrant) File(java.io.File) TableType(org.apache.hadoop.hive.metastore.TableType) ArrayDeque(java.util.ArrayDeque) HivePrivilege(io.trino.plugin.hive.metastore.HivePrivilegeInfo.HivePrivilege) HivePartitionManager.extractPartitionValues(io.trino.plugin.hive.HivePartitionManager.extractPartitionValues) ThriftMetastoreUtil.getHiveBasicStatistics(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.getHiveBasicStatistics) MetastoreUtil.makePartitionName(io.trino.plugin.hive.metastore.MetastoreUtil.makePartitionName) HiveUtil.toPartitionValues(io.trino.plugin.hive.util.HiveUtil.toPartitionValues) EXTERNAL_TABLE(org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE) Database(io.trino.plugin.hive.metastore.Database) SchemaNotFoundException(io.trino.spi.connector.SchemaNotFoundException) NoHdfsAuthentication(io.trino.plugin.hive.authentication.NoHdfsAuthentication) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) MATERIALIZED_VIEW(org.apache.hadoop.hive.metastore.TableType.MATERIALIZED_VIEW) Locale(java.util.Locale) ALREADY_EXISTS(io.trino.spi.StandardErrorCode.ALREADY_EXISTS) Path(org.apache.hadoop.fs.Path) HiveHdfsConfiguration(io.trino.plugin.hive.HiveHdfsConfiguration) Collectors.toSet(java.util.stream.Collectors.toSet) HiveBasicStatistics(io.trino.plugin.hive.HiveBasicStatistics) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) Predicate(java.util.function.Predicate) Collection(java.util.Collection) TableAlreadyExistsException(io.trino.plugin.hive.TableAlreadyExistsException) TrinoException(io.trino.spi.TrinoException) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) String.format(java.lang.String.format) Objects(java.util.Objects) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) List(java.util.List) PARTITION(io.trino.plugin.hive.metastore.file.FileHiveMetastore.SchemaType.PARTITION) Entry(java.util.Map.Entry) Optional(java.util.Optional) HivePrivilegeInfo(io.trino.plugin.hive.metastore.HivePrivilegeInfo) FileUtils.unescapePathName(org.apache.hadoop.hive.common.FileUtils.unescapePathName) JsonCodec(io.airlift.json.JsonCodec) VERSION_COMPATIBILITY_CONFIG(io.trino.plugin.hive.metastore.file.FileHiveMetastoreConfig.VERSION_COMPATIBILITY_CONFIG) MetastoreConfig(io.trino.plugin.hive.metastore.MetastoreConfig) Type(io.trino.spi.type.Type) HashMap(java.util.HashMap) Function(java.util.function.Function) HashSet(java.util.HashSet) Builder(com.google.common.collect.ImmutableSet.Builder) HiveColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics) ImmutableList(com.google.common.collect.ImmutableList) HIVE_METASTORE_ERROR(io.trino.plugin.hive.HiveErrorCode.HIVE_METASTORE_ERROR) UNSAFE_ASSUME_COMPATIBILITY(io.trino.plugin.hive.metastore.file.FileHiveMetastoreConfig.VersionCompatibility.UNSAFE_ASSUME_COMPATIBILITY) Objects.requireNonNull(java.util.Objects.requireNonNull) VIRTUAL_VIEW(org.apache.hadoop.hive.metastore.TableType.VIRTUAL_VIEW) VersionCompatibility(io.trino.plugin.hive.metastore.file.FileHiveMetastoreConfig.VersionCompatibility) OutputStream(java.io.OutputStream) DELTA_LAKE_PROVIDER(io.trino.plugin.hive.util.HiveUtil.DELTA_LAKE_PROVIDER) TupleDomain(io.trino.spi.predicate.TupleDomain) ROLE(io.trino.spi.security.PrincipalType.ROLE) Collectors.toList(java.util.stream.Collectors.toList) MetastoreUtil.verifyCanDropColumn(io.trino.plugin.hive.metastore.MetastoreUtil.verifyCanDropColumn) TABLE(io.trino.plugin.hive.metastore.file.FileHiveMetastore.SchemaType.TABLE) VisibleForTesting(com.google.common.annotations.VisibleForTesting) PrincipalPrivileges(io.trino.plugin.hive.metastore.PrincipalPrivileges) Table(io.trino.plugin.hive.metastore.Table) HiveUtil.isIcebergTable(io.trino.plugin.hive.util.HiveUtil.isIcebergTable) ArrayDeque(java.util.ArrayDeque)

Example 99 with Table

use of io.trino.plugin.hive.metastore.Table in project trino by trinodb.

the class GlueToTrinoConverter method convertTable.

public static Table convertTable(com.amazonaws.services.glue.model.Table glueTable, String dbName) {
    Map<String, String> tableParameters = convertParameters(glueTable.getParameters());
    Table.Builder tableBuilder = Table.builder().setDatabaseName(dbName).setTableName(glueTable.getName()).setOwner(Optional.ofNullable(glueTable.getOwner())).setTableType(firstNonNull(glueTable.getTableType(), EXTERNAL_TABLE.name())).setParameters(tableParameters).setViewOriginalText(Optional.ofNullable(glueTable.getViewOriginalText())).setViewExpandedText(Optional.ofNullable(glueTable.getViewExpandedText()));
    StorageDescriptor sd = glueTable.getStorageDescriptor();
    if (sd == null) {
        if (isIcebergTable(tableParameters) || isDeltaLakeTable(tableParameters)) {
            // Iceberg and Delta Lake tables do not use the StorageDescriptor field, but we need to return a Table so the caller can check that
            // the table is an Iceberg/Delta table and decide whether to redirect or fail.
            tableBuilder.setDataColumns(ImmutableList.of(new Column("dummy", HIVE_INT, Optional.empty())));
            tableBuilder.getStorageBuilder().setStorageFormat(StorageFormat.fromHiveStorageFormat(HiveStorageFormat.PARQUET));
        } else {
            throw new TrinoException(HIVE_UNSUPPORTED_FORMAT, format("Table StorageDescriptor is null for table %s.%s (%s)", dbName, glueTable.getName(), glueTable));
        }
    } else {
        tableBuilder.setDataColumns(convertColumns(sd.getColumns(), sd.getSerdeInfo().getSerializationLibrary()));
        if (glueTable.getPartitionKeys() != null) {
            tableBuilder.setPartitionColumns(convertColumns(glueTable.getPartitionKeys(), sd.getSerdeInfo().getSerializationLibrary()));
        } else {
            tableBuilder.setPartitionColumns(ImmutableList.of());
        }
        // No benefit to memoizing here, just reusing the implementation
        new StorageConverter().setStorageBuilder(sd, tableBuilder.getStorageBuilder(), tableParameters);
    }
    return tableBuilder.build();
}
Also used : HiveUtil.isDeltaLakeTable(io.trino.plugin.hive.util.HiveUtil.isDeltaLakeTable) HiveUtil.isIcebergTable(io.trino.plugin.hive.util.HiveUtil.isIcebergTable) Table(io.trino.plugin.hive.metastore.Table) Column(io.trino.plugin.hive.metastore.Column) SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) StorageDescriptor(com.amazonaws.services.glue.model.StorageDescriptor) TrinoException(io.trino.spi.TrinoException)

Example 100 with Table

use of io.trino.plugin.hive.metastore.Table in project trino by trinodb.

the class DefaultGlueColumnStatisticsProvider method getTableColumnStatistics.

@Override
public Map<String, HiveColumnStatistics> getTableColumnStatistics(Table table) {
    try {
        List<String> columnNames = getAllColumns(table);
        List<List<String>> columnChunks = Lists.partition(columnNames, GLUE_COLUMN_READ_STAT_PAGE_SIZE);
        List<CompletableFuture<GetColumnStatisticsForTableResult>> getStatsFutures = columnChunks.stream().map(partialColumns -> supplyAsync(() -> {
            GetColumnStatisticsForTableRequest request = new GetColumnStatisticsForTableRequest().withCatalogId(catalogId).withDatabaseName(table.getDatabaseName()).withTableName(table.getTableName()).withColumnNames(partialColumns);
            return stats.getGetColumnStatisticsForTable().call(() -> glueClient.getColumnStatisticsForTable(request));
        }, readExecutor)).collect(toImmutableList());
        HiveBasicStatistics tableStatistics = getHiveBasicStatistics(table.getParameters());
        ImmutableMap.Builder<String, HiveColumnStatistics> columnStatsMapBuilder = ImmutableMap.builder();
        for (CompletableFuture<GetColumnStatisticsForTableResult> future : getStatsFutures) {
            GetColumnStatisticsForTableResult tableColumnsStats = getFutureValue(future, TrinoException.class);
            for (ColumnStatistics columnStatistics : tableColumnsStats.getColumnStatisticsList()) {
                columnStatsMapBuilder.put(columnStatistics.getColumnName(), fromGlueColumnStatistics(columnStatistics.getStatisticsData(), tableStatistics.getRowCount()));
            }
        }
        return columnStatsMapBuilder.buildOrThrow();
    } catch (RuntimeException ex) {
        throw new TrinoException(HIVE_METASTORE_ERROR, ex);
    }
}
Also used : ThriftMetastoreUtil.getHiveBasicStatistics(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.getHiveBasicStatistics) GetColumnStatisticsForPartitionResult(com.amazonaws.services.glue.model.GetColumnStatisticsForPartitionResult) GetColumnStatisticsForPartitionRequest(com.amazonaws.services.glue.model.GetColumnStatisticsForPartitionRequest) ColumnStatisticType(io.trino.spi.statistics.ColumnStatisticType) Sets.difference(com.google.common.collect.Sets.difference) GetColumnStatisticsForTableResult(com.amazonaws.services.glue.model.GetColumnStatisticsForTableResult) Column(io.trino.plugin.hive.metastore.Column) Map(java.util.Map) DoubleColumnStatisticsData(com.amazonaws.services.glue.model.DoubleColumnStatisticsData) HiveBasicStatistics(io.trino.plugin.hive.HiveBasicStatistics) Table(io.trino.plugin.hive.metastore.Table) HIVE_PARTITION_NOT_FOUND(io.trino.plugin.hive.HiveErrorCode.HIVE_PARTITION_NOT_FOUND) GlueStatConverter.fromGlueColumnStatistics(io.trino.plugin.hive.metastore.glue.converter.GlueStatConverter.fromGlueColumnStatistics) ColumnStatistics(com.amazonaws.services.glue.model.ColumnStatistics) ImmutableMap(com.google.common.collect.ImmutableMap) Collection(java.util.Collection) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) TrinoException(io.trino.spi.TrinoException) List(java.util.List) DeleteColumnStatisticsForTableRequest(com.amazonaws.services.glue.model.DeleteColumnStatisticsForTableRequest) Optional(java.util.Optional) GetColumnStatisticsForTableRequest(com.amazonaws.services.glue.model.GetColumnStatisticsForTableRequest) UpdateColumnStatisticsForPartitionRequest(com.amazonaws.services.glue.model.UpdateColumnStatisticsForPartitionRequest) EntityNotFoundException(com.amazonaws.services.glue.model.EntityNotFoundException) AWSGlueAsync(com.amazonaws.services.glue.AWSGlueAsync) Partition(io.trino.plugin.hive.metastore.Partition) Type(io.trino.spi.type.Type) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) ArrayList(java.util.ArrayList) Collectors.toUnmodifiableList(java.util.stream.Collectors.toUnmodifiableList) HiveColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics) ThriftMetastoreUtil(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil) ColumnStatisticsData(com.amazonaws.services.glue.model.ColumnStatisticsData) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) CompletableFuture.allOf(java.util.concurrent.CompletableFuture.allOf) HIVE_METASTORE_ERROR(io.trino.plugin.hive.HiveErrorCode.HIVE_METASTORE_ERROR) CompletableFuture.supplyAsync(java.util.concurrent.CompletableFuture.supplyAsync) CompletableFuture.runAsync(java.util.concurrent.CompletableFuture.runAsync) DecimalColumnStatisticsData(com.amazonaws.services.glue.model.DecimalColumnStatisticsData) Executor(java.util.concurrent.Executor) ColumnStatisticsType(com.amazonaws.services.glue.model.ColumnStatisticsType) MoreFutures.getFutureValue(io.airlift.concurrent.MoreFutures.getFutureValue) DateColumnStatisticsData(com.amazonaws.services.glue.model.DateColumnStatisticsData) LongColumnStatisticsData(com.amazonaws.services.glue.model.LongColumnStatisticsData) GlueStatConverter.toGlueColumnStatistics(io.trino.plugin.hive.metastore.glue.converter.GlueStatConverter.toGlueColumnStatistics) DeleteColumnStatisticsForPartitionRequest(com.amazonaws.services.glue.model.DeleteColumnStatisticsForPartitionRequest) UpdateColumnStatisticsForTableRequest(com.amazonaws.services.glue.model.UpdateColumnStatisticsForTableRequest) GlueStatConverter.fromGlueColumnStatistics(io.trino.plugin.hive.metastore.glue.converter.GlueStatConverter.fromGlueColumnStatistics) ColumnStatistics(com.amazonaws.services.glue.model.ColumnStatistics) HiveColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics) GlueStatConverter.toGlueColumnStatistics(io.trino.plugin.hive.metastore.glue.converter.GlueStatConverter.toGlueColumnStatistics) GetColumnStatisticsForTableRequest(com.amazonaws.services.glue.model.GetColumnStatisticsForTableRequest) HiveColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics) ThriftMetastoreUtil.getHiveBasicStatistics(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.getHiveBasicStatistics) HiveBasicStatistics(io.trino.plugin.hive.HiveBasicStatistics) GetColumnStatisticsForTableResult(com.amazonaws.services.glue.model.GetColumnStatisticsForTableResult) ImmutableMap(com.google.common.collect.ImmutableMap) CompletableFuture(java.util.concurrent.CompletableFuture) TrinoException(io.trino.spi.TrinoException) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) List(java.util.List) ArrayList(java.util.ArrayList) Collectors.toUnmodifiableList(java.util.stream.Collectors.toUnmodifiableList) ImmutableList(com.google.common.collect.ImmutableList)

Aggregations

Table (io.trino.plugin.hive.metastore.Table)123 TrinoException (io.trino.spi.TrinoException)69 SchemaTableName (io.trino.spi.connector.SchemaTableName)64 TableNotFoundException (io.trino.spi.connector.TableNotFoundException)57 Path (org.apache.hadoop.fs.Path)56 Column (io.trino.plugin.hive.metastore.Column)54 Optional (java.util.Optional)54 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)50 Partition (io.trino.plugin.hive.metastore.Partition)49 List (java.util.List)48 Map (java.util.Map)47 ImmutableMap (com.google.common.collect.ImmutableMap)45 PrincipalPrivileges (io.trino.plugin.hive.metastore.PrincipalPrivileges)45 ConnectorSession (io.trino.spi.connector.ConnectorSession)45 ImmutableList (com.google.common.collect.ImmutableList)43 Set (java.util.Set)43 Objects.requireNonNull (java.util.Objects.requireNonNull)40 HdfsContext (io.trino.plugin.hive.HdfsEnvironment.HdfsContext)39 TupleDomain (io.trino.spi.predicate.TupleDomain)38 Type (io.trino.spi.type.Type)38