Search in sources :

Example 1 with Layout

use of alluxio.grpc.table.Layout in project presto by prestodb.

the class AlluxioProtoUtils method fromProto.

public static Table fromProto(alluxio.grpc.table.TableInfo table) {
    if (!table.hasLayout()) {
        throw new UnsupportedOperationException("Unsupported table metadata. missing layout.");
    }
    Layout layout = table.getLayout();
    if (!alluxio.table.ProtoUtils.isHiveLayout(layout)) {
        throw new UnsupportedOperationException("Unsupported table layout: " + layout);
    }
    try {
        PartitionInfo partitionInfo = alluxio.table.ProtoUtils.toHiveLayout(layout);
        // compute the data columns
        Set<String> partitionColumns = table.getPartitionColsList().stream().map(FieldSchema::getName).collect(toImmutableSet());
        List<FieldSchema> dataColumns = table.getSchema().getColsList().stream().filter((field) -> !partitionColumns.contains(field.getName())).collect(toImmutableList());
        Table.Builder builder = Table.builder().setDatabaseName(table.getDbName()).setTableName(table.getTableName()).setOwner(table.getOwner()).setTableType(PrestoTableType.OTHER).setDataColumns(dataColumns.stream().map(AlluxioProtoUtils::fromProto).collect(toImmutableList())).setPartitionColumns(table.getPartitionColsList().stream().map(AlluxioProtoUtils::fromProto).collect(toImmutableList())).setParameters(table.getParametersMap()).setViewOriginalText(Optional.empty()).setViewExpandedText(Optional.empty());
        alluxio.grpc.table.layout.hive.Storage storage = partitionInfo.getStorage();
        // TODO: We should also set storage parameters here when they are available in alluxio.grpc.table.layout.hive.Storage
        builder.getStorageBuilder().setSkewed(storage.getSkewed()).setStorageFormat(fromProto(storage.getStorageFormat())).setLocation(storage.getLocation()).setBucketProperty(storage.hasBucketProperty() ? fromProto(storage.getBucketProperty()) : Optional.empty()).setSerdeParameters(storage.getStorageFormat().getSerdelibParametersMap());
        return builder.build();
    } catch (InvalidProtocolBufferException e) {
        throw new IllegalArgumentException("Failed to extract PartitionInfo from TableInfo", e);
    }
}
Also used : Date(alluxio.grpc.table.Date) HIVE_COMPATIBLE(com.facebook.presto.hive.BucketFunctionType.HIVE_COMPATIBLE) ThriftMetastoreUtil.fromMetastoreNullsCount(com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.fromMetastoreNullsCount) MetastoreUtil.fromMetastoreDistinctValuesCount(com.facebook.presto.hive.metastore.MetastoreUtil.fromMetastoreDistinctValuesCount) DoubleColumnStatsData(alluxio.grpc.table.DoubleColumnStatsData) HiveColumnStatistics.createDoubleColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics.createDoubleColumnStatistics) BigDecimal(java.math.BigDecimal) BigInteger(java.math.BigInteger) BinaryColumnStatsData(alluxio.grpc.table.BinaryColumnStatsData) StorageFormat(com.facebook.presto.hive.metastore.StorageFormat) HIVE_INVALID_METADATA(com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_METADATA) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Layout(alluxio.grpc.table.Layout) Set(java.util.Set) DecimalColumnStatsData(alluxio.grpc.table.DecimalColumnStatsData) PartitionInfo(alluxio.grpc.table.layout.hive.PartitionInfo) List(java.util.List) ROLE(com.facebook.presto.spi.security.PrincipalType.ROLE) PrestoTableType(com.facebook.presto.hive.metastore.PrestoTableType) LocalDate(java.time.LocalDate) HiveColumnStatistics.createBinaryColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics.createBinaryColumnStatistics) Optional(java.util.Optional) HiveColumnStatistics.createBooleanColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics.createBooleanColumnStatistics) Table(com.facebook.presto.hive.metastore.Table) HiveColumnStatistics.createIntegerColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics.createIntegerColumnStatistics) Column(com.facebook.presto.hive.metastore.Column) Database(com.facebook.presto.hive.metastore.Database) DateColumnStatsData(alluxio.grpc.table.DateColumnStatsData) HiveType(com.facebook.presto.hive.HiveType) OptionalDouble(java.util.OptionalDouble) LongColumnStatsData(alluxio.grpc.table.LongColumnStatsData) HiveColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics) PrestoException(com.facebook.presto.spi.PrestoException) Partition(com.facebook.presto.hive.metastore.Partition) OptionalLong(java.util.OptionalLong) FieldSchema(alluxio.grpc.table.FieldSchema) Lists(com.google.common.collect.Lists) ThriftMetastoreUtil.getTotalSizeInBytes(com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.getTotalSizeInBytes) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) SortingColumn(com.facebook.presto.hive.metastore.SortingColumn) Nullable(javax.annotation.Nullable) ColumnStatisticsData(alluxio.grpc.table.ColumnStatisticsData) USER(com.facebook.presto.spi.security.PrincipalType.USER) InvalidProtocolBufferException(alluxio.shaded.client.com.google.protobuf.InvalidProtocolBufferException) HiveColumnStatistics.createStringColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics.createStringColumnStatistics) BooleanColumnStatsData(alluxio.grpc.table.BooleanColumnStatsData) Decimal(alluxio.grpc.table.Decimal) StringColumnStatsData(alluxio.grpc.table.StringColumnStatsData) HiveBucketProperty(com.facebook.presto.hive.HiveBucketProperty) HiveColumnStatistics.createDecimalColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics.createDecimalColumnStatistics) HiveColumnStatistics.createDateColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics.createDateColumnStatistics) Table(com.facebook.presto.hive.metastore.Table) FieldSchema(alluxio.grpc.table.FieldSchema) InvalidProtocolBufferException(alluxio.shaded.client.com.google.protobuf.InvalidProtocolBufferException) Layout(alluxio.grpc.table.Layout) PartitionInfo(alluxio.grpc.table.layout.hive.PartitionInfo)

Example 2 with Layout

use of alluxio.grpc.table.Layout in project alluxio by Alluxio.

the class GlueDatabase method getTable.

@Override
public UdbTable getTable(String tableName, UdbBypassSpec bypassSpec) throws IOException {
    Table table;
    List<Partition> partitions;
    try {
        GetTableRequest tableRequest = new GetTableRequest().withCatalogId(mGlueConfiguration.get(Property.CATALOG_ID)).withDatabaseName(mGlueDbName).withName(tableName);
        table = getClient().getTable(tableRequest).getTable();
        partitions = batchGetPartitions(getClient(), tableName);
        PathTranslator pathTranslator = mountAlluxioPaths(table, partitions, bypassSpec);
        List<Column> partitionColumns;
        if (table.getPartitionKeys() == null) {
            partitionColumns = Collections.emptyList();
        } else {
            partitionColumns = table.getPartitionKeys();
        }
        // Get table parameters
        Map<String, String> tableParameters = table.getParameters() == null ? Collections.emptyMap() : table.getParameters();
        // Get column statistics info for table
        List<String> columnNames = table.getStorageDescriptor().getColumns().stream().map(Column::getName).collect(Collectors.toList());
        GetColumnStatisticsForTableRequest getColumnStatisticsForTableRequest = new GetColumnStatisticsForTableRequest().withCatalogId(mGlueConfiguration.get(Property.CATALOG_ID)).withDatabaseName(mGlueDbName).withTableName(tableName).withColumnNames(columnNames);
        List<ColumnStatisticsInfo> columnStatisticsTableData = new ArrayList<>();
        if (mGlueConfiguration.getBoolean(Property.TABLE_COLUMN_STATISTICS_ENABLE)) {
            columnStatisticsTableData = getTableColumnStatistics(mGlueDbName, tableName, getColumnStatisticsForTableRequest);
        }
        // Get column statistics info for partitions
        // potential expensive call
        Map<String, List<ColumnStatisticsInfo>> statsMap = new HashMap<>();
        if (mGlueConfiguration.getBoolean(Property.PARTITION_COLUMN_STATISTICS_ENABLE)) {
            for (Partition partition : partitions) {
                List<String> partitionValue = partition.getValues();
                if (partitionValue != null) {
                    GetColumnStatisticsForPartitionRequest getColumnStatisticsForPartitionRequest = new GetColumnStatisticsForPartitionRequest().withCatalogId(mGlueConfiguration.get(Property.CATALOG_ID)).withDatabaseName(mGlueDbName).withTableName(tableName).withColumnNames(columnNames).withPartitionValues(partitionValue);
                    String partName = GlueUtils.makePartitionName(partitionColumns, partition.getValues());
                    statsMap.put(partName, getPartitionColumnStatistics(mGlueDbName, tableName, getColumnStatisticsForPartitionRequest));
                }
            }
        }
        PartitionInfo partitionInfo = PartitionInfo.newBuilder().setDbName(mGlueDbName).setTableName(tableName).addAllDataCols(GlueUtils.toProto(table.getStorageDescriptor().getColumns())).setStorage(GlueUtils.toProto(table.getStorageDescriptor(), pathTranslator)).putAllParameters(tableParameters).build();
        Layout layout = Layout.newBuilder().setLayoutType(HiveLayout.TYPE).setLayoutData(partitionInfo.toByteString()).build();
        List<UdbPartition> udbPartitions = new ArrayList<>();
        if (partitionColumns.isEmpty()) {
            PartitionInfo.Builder partitionInfoBuilder = PartitionInfo.newBuilder().setDbName(mGlueDbName).setTableName(tableName).addAllDataCols(GlueUtils.toProto(table.getStorageDescriptor().getColumns())).setStorage(GlueUtils.toProto(table.getStorageDescriptor(), pathTranslator)).setPartitionName(tableName).putAllParameters(tableParameters);
            udbPartitions.add(new GluePartition(new HiveLayout(partitionInfoBuilder.build(), Collections.emptyList())));
        } else {
            for (Partition partition : partitions) {
                String partName = GlueUtils.makePartitionName(partitionColumns, partition.getValues());
                PartitionInfo.Builder partitionInfoBuilder = PartitionInfo.newBuilder().setDbName(mGlueDbName).setTableName(tableName).addAllDataCols(GlueUtils.toProto(partition.getStorageDescriptor().getColumns())).setStorage(GlueUtils.toProto(partition.getStorageDescriptor(), pathTranslator)).setPartitionName(partName).putAllParameters(partition.getParameters() == null ? Collections.emptyMap() : partition.getParameters());
                if (partition.getValues() != null) {
                    partitionInfoBuilder.addAllValues(partition.getValues());
                }
                udbPartitions.add(new GluePartition(new HiveLayout(partitionInfoBuilder.build(), statsMap.getOrDefault(partName, Collections.emptyList()))));
            }
        }
        return new GlueTable(this, pathTranslator, tableName, GlueUtils.toProtoSchema(table.getStorageDescriptor().getColumns()), columnStatisticsTableData, // Get FieldSchema from partition keys
        GlueUtils.toProto(table.getPartitionKeys()), udbPartitions, layout, table);
    } catch (EntityNotFoundException e) {
        throw new NotFoundException("Table " + tableName + " does not exist in Database: " + mGlueDbName + "; Catalog ID: " + mGlueConfiguration.get(Property.CATALOG_ID) + ".", e);
    } catch (ValidationException e) {
        throw new IOException("Failed to get table: " + tableName + " in Database: " + mGlueDbName + "; Catalog ID: " + mGlueConfiguration.get(Property.CATALOG_ID) + " with validation error: " + e.getMessage(), e);
    } catch (GlueEncryptionException e) {
        throw new IOException("Failed to get table: " + tableName + " in Database: " + mGlueDbName + "; Catalog ID: " + mGlueConfiguration.get(Property.CATALOG_ID) + " error: " + e.getMessage(), e);
    }
}
Also used : HiveLayout(alluxio.table.common.layout.HiveLayout) ValidationException(com.amazonaws.services.glue.model.ValidationException) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) EntityNotFoundException(com.amazonaws.services.glue.model.EntityNotFoundException) NotFoundException(alluxio.exception.status.NotFoundException) UdbPartition(alluxio.table.common.UdbPartition) Column(com.amazonaws.services.glue.model.Column) GlueEncryptionException(com.amazonaws.services.glue.model.GlueEncryptionException) List(java.util.List) ArrayList(java.util.ArrayList) PartitionInfo(alluxio.grpc.table.layout.hive.PartitionInfo) UdbPartition(alluxio.table.common.UdbPartition) Partition(com.amazonaws.services.glue.model.Partition) UdbTable(alluxio.table.common.udb.UdbTable) Table(com.amazonaws.services.glue.model.Table) GetColumnStatisticsForTableRequest(com.amazonaws.services.glue.model.GetColumnStatisticsForTableRequest) EntityNotFoundException(com.amazonaws.services.glue.model.EntityNotFoundException) IOException(java.io.IOException) GetTableRequest(com.amazonaws.services.glue.model.GetTableRequest) GetColumnStatisticsForPartitionRequest(com.amazonaws.services.glue.model.GetColumnStatisticsForPartitionRequest) PathTranslator(alluxio.table.common.udb.PathTranslator) Layout(alluxio.grpc.table.Layout) HiveLayout(alluxio.table.common.layout.HiveLayout) ColumnStatisticsInfo(alluxio.grpc.table.ColumnStatisticsInfo)

Example 3 with Layout

use of alluxio.grpc.table.Layout in project alluxio by Alluxio.

the class HiveDatabase method getTable.

@Override
public UdbTable getTable(String tableName, UdbBypassSpec bypassSpec) throws IOException {
    try {
        Table table;
        List<Partition> partitions;
        List<ColumnStatisticsObj> columnStats;
        List<String> partitionColumns;
        Map<String, List<ColumnStatisticsInfo>> statsMap = new HashMap<>();
        // perform all the hive client operations, and release the client early.
        try (CloseableResource<IMetaStoreClient> client = mClientPool.acquireClientResource()) {
            table = client.get().getTable(mHiveDbName, tableName);
            // Potentially expensive call
            partitions = client.get().listPartitions(mHiveDbName, table.getTableName(), (short) -1);
            List<String> colNames = table.getSd().getCols().stream().map(FieldSchema::getName).collect(Collectors.toList());
            columnStats = client.get().getTableColumnStatistics(mHiveDbName, tableName, colNames);
            // construct the partition statistics
            List<String> dataColumns = table.getSd().getCols().stream().map(org.apache.hadoop.hive.metastore.api.FieldSchema::getName).collect(Collectors.toList());
            partitionColumns = table.getPartitionKeys().stream().map(org.apache.hadoop.hive.metastore.api.FieldSchema::getName).collect(Collectors.toList());
            List<String> partitionNames = partitions.stream().map(partition -> FileUtils.makePartName(partitionColumns, partition.getValues())).collect(Collectors.toList());
            for (List<String> partialPartitionNames : Lists.partition(partitionNames, MAX_PARTITION_COLUMN_STATISTICS)) {
                statsMap.putAll(client.get().getPartitionColumnStatistics(mHiveDbName, tableName, partialPartitionNames, dataColumns).entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, e -> e.getValue().stream().map(HiveUtils::toProto).collect(Collectors.toList()), (e1, e2) -> e2)));
            }
        }
        PathTranslator pathTranslator = mountAlluxioPaths(table, partitions, bypassSpec);
        List<ColumnStatisticsInfo> colStats = columnStats.stream().map(HiveUtils::toProto).collect(Collectors.toList());
        // construct table layout
        PartitionInfo partitionInfo = PartitionInfo.newBuilder().setDbName(getUdbContext().getDbName()).setTableName(tableName).addAllDataCols(HiveUtils.toProto(table.getSd().getCols())).setStorage(HiveUtils.toProto(table.getSd(), pathTranslator)).putAllParameters(table.getParameters()).build();
        Layout layout = Layout.newBuilder().setLayoutType(HiveLayout.TYPE).setLayoutData(partitionInfo.toByteString()).build();
        // create udb partitions info
        List<UdbPartition> udbPartitions = new ArrayList<>();
        if (partitionColumns.isEmpty()) {
            // unpartitioned table, generate a partition
            PartitionInfo.Builder pib = PartitionInfo.newBuilder().setDbName(getUdbContext().getDbName()).setTableName(tableName).addAllDataCols(HiveUtils.toProto(table.getSd().getCols())).setStorage(HiveUtils.toProto(table.getSd(), pathTranslator)).setPartitionName(tableName).putAllParameters(table.getParameters());
            udbPartitions.add(new HivePartition(new HiveLayout(pib.build(), Collections.emptyList())));
        } else {
            for (Partition partition : partitions) {
                String partName = FileUtils.makePartName(partitionColumns, partition.getValues());
                PartitionInfo.Builder pib = PartitionInfo.newBuilder().setDbName(getUdbContext().getDbName()).setTableName(tableName).addAllDataCols(HiveUtils.toProto(partition.getSd().getCols())).setStorage(HiveUtils.toProto(partition.getSd(), pathTranslator)).setPartitionName(partName).putAllParameters(partition.getParameters());
                if (partition.getValues() != null) {
                    pib.addAllValues(partition.getValues());
                }
                udbPartitions.add(new HivePartition(new HiveLayout(pib.build(), statsMap.getOrDefault(partName, Collections.emptyList()))));
            }
        }
        return new HiveTable(tableName, HiveUtils.toProtoSchema(table.getSd().getCols()), colStats, HiveUtils.toProto(table.getPartitionKeys()), udbPartitions, layout, table);
    } catch (NoSuchObjectException e) {
        throw new NotFoundException("Table " + tableName + " does not exist.", e);
    } catch (TException e) {
        throw new IOException("Failed to get table: " + tableName + " error: " + e.getMessage(), e);
    }
}
Also used : ColumnStatisticsInfo(alluxio.grpc.table.ColumnStatisticsInfo) UdbUtils(alluxio.table.common.udb.UdbUtils) UnderDatabase(alluxio.table.common.udb.UnderDatabase) UdbPartition(alluxio.table.common.UdbPartition) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) UdbContext(alluxio.table.common.udb.UdbContext) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) Partition(org.apache.hadoop.hive.metastore.api.Partition) Warehouse(org.apache.hadoop.hive.metastore.Warehouse) ArrayList(java.util.ArrayList) DatabaseInfo(alluxio.master.table.DatabaseInfo) PathUtils(alluxio.util.io.PathUtils) HiveClientPoolCache(alluxio.table.under.hive.util.HiveClientPoolCache) Lists(com.google.common.collect.Lists) CloseableResource(alluxio.resource.CloseableResource) AbstractHiveClientPool(alluxio.table.under.hive.util.AbstractHiveClientPool) AlluxioURI(alluxio.AlluxioURI) UdbBypassSpec(alluxio.table.common.udb.UdbBypassSpec) Map(java.util.Map) UdbConfiguration(alluxio.table.common.udb.UdbConfiguration) Logger(org.slf4j.Logger) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) Layout(alluxio.grpc.table.Layout) TException(org.apache.thrift.TException) AlluxioException(alluxio.exception.AlluxioException) PrincipalType(org.apache.hadoop.hive.metastore.api.PrincipalType) IOException(java.io.IOException) NotFoundException(alluxio.exception.status.NotFoundException) Collectors(java.util.stream.Collectors) Table(org.apache.hadoop.hive.metastore.api.Table) Objects(java.util.Objects) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) PartitionInfo(alluxio.grpc.table.layout.hive.PartitionInfo) List(java.util.List) PathTranslator(alluxio.table.common.udb.PathTranslator) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) UdbTable(alluxio.table.common.udb.UdbTable) FileUtils(org.apache.hadoop.hive.common.FileUtils) VisibleForTesting(com.google.common.annotations.VisibleForTesting) HiveLayout(alluxio.table.common.layout.HiveLayout) Database(org.apache.hadoop.hive.metastore.api.Database) Collections(java.util.Collections) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) TException(org.apache.thrift.TException) HiveLayout(alluxio.table.common.layout.HiveLayout) HashMap(java.util.HashMap) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) NotFoundException(alluxio.exception.status.NotFoundException) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) UdbPartition(alluxio.table.common.UdbPartition) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) ArrayList(java.util.ArrayList) List(java.util.List) PartitionInfo(alluxio.grpc.table.layout.hive.PartitionInfo) UdbPartition(alluxio.table.common.UdbPartition) Partition(org.apache.hadoop.hive.metastore.api.Partition) Table(org.apache.hadoop.hive.metastore.api.Table) UdbTable(alluxio.table.common.udb.UdbTable) IOException(java.io.IOException) PathTranslator(alluxio.table.common.udb.PathTranslator) Layout(alluxio.grpc.table.Layout) HiveLayout(alluxio.table.common.layout.HiveLayout) ColumnStatisticsInfo(alluxio.grpc.table.ColumnStatisticsInfo) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException)

Aggregations

Layout (alluxio.grpc.table.Layout)3 PartitionInfo (alluxio.grpc.table.layout.hive.PartitionInfo)3 List (java.util.List)3 NotFoundException (alluxio.exception.status.NotFoundException)2 ColumnStatisticsInfo (alluxio.grpc.table.ColumnStatisticsInfo)2 UdbPartition (alluxio.table.common.UdbPartition)2 HiveLayout (alluxio.table.common.layout.HiveLayout)2 PathTranslator (alluxio.table.common.udb.PathTranslator)2 UdbTable (alluxio.table.common.udb.UdbTable)2 Lists (com.google.common.collect.Lists)2 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 AlluxioURI (alluxio.AlluxioURI)1 AlluxioException (alluxio.exception.AlluxioException)1 BinaryColumnStatsData (alluxio.grpc.table.BinaryColumnStatsData)1 BooleanColumnStatsData (alluxio.grpc.table.BooleanColumnStatsData)1 ColumnStatisticsData (alluxio.grpc.table.ColumnStatisticsData)1 Date (alluxio.grpc.table.Date)1 DateColumnStatsData (alluxio.grpc.table.DateColumnStatsData)1