Search in sources :

Example 1 with UdbPartition

use of alluxio.table.common.UdbPartition in project alluxio by Alluxio.

the class AlluxioCatalogTest method createMockUdbTable.

UdbTable createMockUdbTable(String name, Schema schema) throws IOException {
    UdbPartition partition = Mockito.mock(UdbPartition.class);
    when(partition.getSpec()).thenReturn(name);
    when(partition.getLayout()).thenReturn(new HiveLayout(PartitionInfo.getDefaultInstance(), Collections.emptyList()));
    UdbTable tbl = Mockito.mock(UdbTable.class);
    when(tbl.getName()).thenReturn(name);
    when(tbl.getSchema()).thenReturn(schema);
    when(tbl.getStatistics()).thenReturn(createRandomStatsForSchema(schema));
    when(tbl.getPartitions()).thenReturn(Arrays.asList(partition));
    when(tbl.getPartitionCols()).thenReturn(Collections.emptyList());
    when(tbl.getLayout()).thenReturn(new HiveLayout(PartitionInfo.getDefaultInstance(), Collections.emptyList()).toProto());
    return tbl;
}
Also used : UdbTable(alluxio.table.common.udb.UdbTable) HiveLayout(alluxio.table.common.layout.HiveLayout) UdbPartition(alluxio.table.common.UdbPartition)

Example 2 with UdbPartition

use of alluxio.table.common.UdbPartition in project alluxio by Alluxio.

the class GlueDatabase method getTable.

@Override
public UdbTable getTable(String tableName, UdbBypassSpec bypassSpec) throws IOException {
    Table table;
    List<Partition> partitions;
    try {
        GetTableRequest tableRequest = new GetTableRequest().withCatalogId(mGlueConfiguration.get(Property.CATALOG_ID)).withDatabaseName(mGlueDbName).withName(tableName);
        table = getClient().getTable(tableRequest).getTable();
        partitions = batchGetPartitions(getClient(), tableName);
        PathTranslator pathTranslator = mountAlluxioPaths(table, partitions, bypassSpec);
        List<Column> partitionColumns;
        if (table.getPartitionKeys() == null) {
            partitionColumns = Collections.emptyList();
        } else {
            partitionColumns = table.getPartitionKeys();
        }
        // Get table parameters
        Map<String, String> tableParameters = table.getParameters() == null ? Collections.emptyMap() : table.getParameters();
        // Get column statistics info for table
        List<String> columnNames = table.getStorageDescriptor().getColumns().stream().map(Column::getName).collect(Collectors.toList());
        GetColumnStatisticsForTableRequest getColumnStatisticsForTableRequest = new GetColumnStatisticsForTableRequest().withCatalogId(mGlueConfiguration.get(Property.CATALOG_ID)).withDatabaseName(mGlueDbName).withTableName(tableName).withColumnNames(columnNames);
        List<ColumnStatisticsInfo> columnStatisticsTableData = new ArrayList<>();
        if (mGlueConfiguration.getBoolean(Property.TABLE_COLUMN_STATISTICS_ENABLE)) {
            columnStatisticsTableData = getTableColumnStatistics(mGlueDbName, tableName, getColumnStatisticsForTableRequest);
        }
        // Get column statistics info for partitions
        // potential expensive call
        Map<String, List<ColumnStatisticsInfo>> statsMap = new HashMap<>();
        if (mGlueConfiguration.getBoolean(Property.PARTITION_COLUMN_STATISTICS_ENABLE)) {
            for (Partition partition : partitions) {
                List<String> partitionValue = partition.getValues();
                if (partitionValue != null) {
                    GetColumnStatisticsForPartitionRequest getColumnStatisticsForPartitionRequest = new GetColumnStatisticsForPartitionRequest().withCatalogId(mGlueConfiguration.get(Property.CATALOG_ID)).withDatabaseName(mGlueDbName).withTableName(tableName).withColumnNames(columnNames).withPartitionValues(partitionValue);
                    String partName = GlueUtils.makePartitionName(partitionColumns, partition.getValues());
                    statsMap.put(partName, getPartitionColumnStatistics(mGlueDbName, tableName, getColumnStatisticsForPartitionRequest));
                }
            }
        }
        PartitionInfo partitionInfo = PartitionInfo.newBuilder().setDbName(mGlueDbName).setTableName(tableName).addAllDataCols(GlueUtils.toProto(table.getStorageDescriptor().getColumns())).setStorage(GlueUtils.toProto(table.getStorageDescriptor(), pathTranslator)).putAllParameters(tableParameters).build();
        Layout layout = Layout.newBuilder().setLayoutType(HiveLayout.TYPE).setLayoutData(partitionInfo.toByteString()).build();
        List<UdbPartition> udbPartitions = new ArrayList<>();
        if (partitionColumns.isEmpty()) {
            PartitionInfo.Builder partitionInfoBuilder = PartitionInfo.newBuilder().setDbName(mGlueDbName).setTableName(tableName).addAllDataCols(GlueUtils.toProto(table.getStorageDescriptor().getColumns())).setStorage(GlueUtils.toProto(table.getStorageDescriptor(), pathTranslator)).setPartitionName(tableName).putAllParameters(tableParameters);
            udbPartitions.add(new GluePartition(new HiveLayout(partitionInfoBuilder.build(), Collections.emptyList())));
        } else {
            for (Partition partition : partitions) {
                String partName = GlueUtils.makePartitionName(partitionColumns, partition.getValues());
                PartitionInfo.Builder partitionInfoBuilder = PartitionInfo.newBuilder().setDbName(mGlueDbName).setTableName(tableName).addAllDataCols(GlueUtils.toProto(partition.getStorageDescriptor().getColumns())).setStorage(GlueUtils.toProto(partition.getStorageDescriptor(), pathTranslator)).setPartitionName(partName).putAllParameters(partition.getParameters() == null ? Collections.emptyMap() : partition.getParameters());
                if (partition.getValues() != null) {
                    partitionInfoBuilder.addAllValues(partition.getValues());
                }
                udbPartitions.add(new GluePartition(new HiveLayout(partitionInfoBuilder.build(), statsMap.getOrDefault(partName, Collections.emptyList()))));
            }
        }
        return new GlueTable(this, pathTranslator, tableName, GlueUtils.toProtoSchema(table.getStorageDescriptor().getColumns()), columnStatisticsTableData, // Get FieldSchema from partition keys
        GlueUtils.toProto(table.getPartitionKeys()), udbPartitions, layout, table);
    } catch (EntityNotFoundException e) {
        throw new NotFoundException("Table " + tableName + " does not exist in Database: " + mGlueDbName + "; Catalog ID: " + mGlueConfiguration.get(Property.CATALOG_ID) + ".", e);
    } catch (ValidationException e) {
        throw new IOException("Failed to get table: " + tableName + " in Database: " + mGlueDbName + "; Catalog ID: " + mGlueConfiguration.get(Property.CATALOG_ID) + " with validation error: " + e.getMessage(), e);
    } catch (GlueEncryptionException e) {
        throw new IOException("Failed to get table: " + tableName + " in Database: " + mGlueDbName + "; Catalog ID: " + mGlueConfiguration.get(Property.CATALOG_ID) + " error: " + e.getMessage(), e);
    }
}
Also used : HiveLayout(alluxio.table.common.layout.HiveLayout) ValidationException(com.amazonaws.services.glue.model.ValidationException) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) EntityNotFoundException(com.amazonaws.services.glue.model.EntityNotFoundException) NotFoundException(alluxio.exception.status.NotFoundException) UdbPartition(alluxio.table.common.UdbPartition) Column(com.amazonaws.services.glue.model.Column) GlueEncryptionException(com.amazonaws.services.glue.model.GlueEncryptionException) List(java.util.List) ArrayList(java.util.ArrayList) PartitionInfo(alluxio.grpc.table.layout.hive.PartitionInfo) UdbPartition(alluxio.table.common.UdbPartition) Partition(com.amazonaws.services.glue.model.Partition) UdbTable(alluxio.table.common.udb.UdbTable) Table(com.amazonaws.services.glue.model.Table) GetColumnStatisticsForTableRequest(com.amazonaws.services.glue.model.GetColumnStatisticsForTableRequest) EntityNotFoundException(com.amazonaws.services.glue.model.EntityNotFoundException) IOException(java.io.IOException) GetTableRequest(com.amazonaws.services.glue.model.GetTableRequest) GetColumnStatisticsForPartitionRequest(com.amazonaws.services.glue.model.GetColumnStatisticsForPartitionRequest) PathTranslator(alluxio.table.common.udb.PathTranslator) Layout(alluxio.grpc.table.Layout) HiveLayout(alluxio.table.common.layout.HiveLayout) ColumnStatisticsInfo(alluxio.grpc.table.ColumnStatisticsInfo)

Example 3 with UdbPartition

use of alluxio.table.common.UdbPartition in project alluxio by Alluxio.

the class HiveDatabase method getTable.

@Override
public UdbTable getTable(String tableName, UdbBypassSpec bypassSpec) throws IOException {
    try {
        Table table;
        List<Partition> partitions;
        List<ColumnStatisticsObj> columnStats;
        List<String> partitionColumns;
        Map<String, List<ColumnStatisticsInfo>> statsMap = new HashMap<>();
        // perform all the hive client operations, and release the client early.
        try (CloseableResource<IMetaStoreClient> client = mClientPool.acquireClientResource()) {
            table = client.get().getTable(mHiveDbName, tableName);
            // Potentially expensive call
            partitions = client.get().listPartitions(mHiveDbName, table.getTableName(), (short) -1);
            List<String> colNames = table.getSd().getCols().stream().map(FieldSchema::getName).collect(Collectors.toList());
            columnStats = client.get().getTableColumnStatistics(mHiveDbName, tableName, colNames);
            // construct the partition statistics
            List<String> dataColumns = table.getSd().getCols().stream().map(org.apache.hadoop.hive.metastore.api.FieldSchema::getName).collect(Collectors.toList());
            partitionColumns = table.getPartitionKeys().stream().map(org.apache.hadoop.hive.metastore.api.FieldSchema::getName).collect(Collectors.toList());
            List<String> partitionNames = partitions.stream().map(partition -> FileUtils.makePartName(partitionColumns, partition.getValues())).collect(Collectors.toList());
            for (List<String> partialPartitionNames : Lists.partition(partitionNames, MAX_PARTITION_COLUMN_STATISTICS)) {
                statsMap.putAll(client.get().getPartitionColumnStatistics(mHiveDbName, tableName, partialPartitionNames, dataColumns).entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, e -> e.getValue().stream().map(HiveUtils::toProto).collect(Collectors.toList()), (e1, e2) -> e2)));
            }
        }
        PathTranslator pathTranslator = mountAlluxioPaths(table, partitions, bypassSpec);
        List<ColumnStatisticsInfo> colStats = columnStats.stream().map(HiveUtils::toProto).collect(Collectors.toList());
        // construct table layout
        PartitionInfo partitionInfo = PartitionInfo.newBuilder().setDbName(getUdbContext().getDbName()).setTableName(tableName).addAllDataCols(HiveUtils.toProto(table.getSd().getCols())).setStorage(HiveUtils.toProto(table.getSd(), pathTranslator)).putAllParameters(table.getParameters()).build();
        Layout layout = Layout.newBuilder().setLayoutType(HiveLayout.TYPE).setLayoutData(partitionInfo.toByteString()).build();
        // create udb partitions info
        List<UdbPartition> udbPartitions = new ArrayList<>();
        if (partitionColumns.isEmpty()) {
            // unpartitioned table, generate a partition
            PartitionInfo.Builder pib = PartitionInfo.newBuilder().setDbName(getUdbContext().getDbName()).setTableName(tableName).addAllDataCols(HiveUtils.toProto(table.getSd().getCols())).setStorage(HiveUtils.toProto(table.getSd(), pathTranslator)).setPartitionName(tableName).putAllParameters(table.getParameters());
            udbPartitions.add(new HivePartition(new HiveLayout(pib.build(), Collections.emptyList())));
        } else {
            for (Partition partition : partitions) {
                String partName = FileUtils.makePartName(partitionColumns, partition.getValues());
                PartitionInfo.Builder pib = PartitionInfo.newBuilder().setDbName(getUdbContext().getDbName()).setTableName(tableName).addAllDataCols(HiveUtils.toProto(partition.getSd().getCols())).setStorage(HiveUtils.toProto(partition.getSd(), pathTranslator)).setPartitionName(partName).putAllParameters(partition.getParameters());
                if (partition.getValues() != null) {
                    pib.addAllValues(partition.getValues());
                }
                udbPartitions.add(new HivePartition(new HiveLayout(pib.build(), statsMap.getOrDefault(partName, Collections.emptyList()))));
            }
        }
        return new HiveTable(tableName, HiveUtils.toProtoSchema(table.getSd().getCols()), colStats, HiveUtils.toProto(table.getPartitionKeys()), udbPartitions, layout, table);
    } catch (NoSuchObjectException e) {
        throw new NotFoundException("Table " + tableName + " does not exist.", e);
    } catch (TException e) {
        throw new IOException("Failed to get table: " + tableName + " error: " + e.getMessage(), e);
    }
}
Also used : ColumnStatisticsInfo(alluxio.grpc.table.ColumnStatisticsInfo) UdbUtils(alluxio.table.common.udb.UdbUtils) UnderDatabase(alluxio.table.common.udb.UnderDatabase) UdbPartition(alluxio.table.common.UdbPartition) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) UdbContext(alluxio.table.common.udb.UdbContext) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) Partition(org.apache.hadoop.hive.metastore.api.Partition) Warehouse(org.apache.hadoop.hive.metastore.Warehouse) ArrayList(java.util.ArrayList) DatabaseInfo(alluxio.master.table.DatabaseInfo) PathUtils(alluxio.util.io.PathUtils) HiveClientPoolCache(alluxio.table.under.hive.util.HiveClientPoolCache) Lists(com.google.common.collect.Lists) CloseableResource(alluxio.resource.CloseableResource) AbstractHiveClientPool(alluxio.table.under.hive.util.AbstractHiveClientPool) AlluxioURI(alluxio.AlluxioURI) UdbBypassSpec(alluxio.table.common.udb.UdbBypassSpec) Map(java.util.Map) UdbConfiguration(alluxio.table.common.udb.UdbConfiguration) Logger(org.slf4j.Logger) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) Layout(alluxio.grpc.table.Layout) TException(org.apache.thrift.TException) AlluxioException(alluxio.exception.AlluxioException) PrincipalType(org.apache.hadoop.hive.metastore.api.PrincipalType) IOException(java.io.IOException) NotFoundException(alluxio.exception.status.NotFoundException) Collectors(java.util.stream.Collectors) Table(org.apache.hadoop.hive.metastore.api.Table) Objects(java.util.Objects) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) PartitionInfo(alluxio.grpc.table.layout.hive.PartitionInfo) List(java.util.List) PathTranslator(alluxio.table.common.udb.PathTranslator) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) UdbTable(alluxio.table.common.udb.UdbTable) FileUtils(org.apache.hadoop.hive.common.FileUtils) VisibleForTesting(com.google.common.annotations.VisibleForTesting) HiveLayout(alluxio.table.common.layout.HiveLayout) Database(org.apache.hadoop.hive.metastore.api.Database) Collections(java.util.Collections) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) TException(org.apache.thrift.TException) HiveLayout(alluxio.table.common.layout.HiveLayout) HashMap(java.util.HashMap) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) NotFoundException(alluxio.exception.status.NotFoundException) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) UdbPartition(alluxio.table.common.UdbPartition) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) ArrayList(java.util.ArrayList) List(java.util.List) PartitionInfo(alluxio.grpc.table.layout.hive.PartitionInfo) UdbPartition(alluxio.table.common.UdbPartition) Partition(org.apache.hadoop.hive.metastore.api.Partition) Table(org.apache.hadoop.hive.metastore.api.Table) UdbTable(alluxio.table.common.udb.UdbTable) IOException(java.io.IOException) PathTranslator(alluxio.table.common.udb.PathTranslator) Layout(alluxio.grpc.table.Layout) HiveLayout(alluxio.table.common.layout.HiveLayout) ColumnStatisticsInfo(alluxio.grpc.table.ColumnStatisticsInfo) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException)

Example 4 with UdbPartition

use of alluxio.table.common.UdbPartition in project alluxio by Alluxio.

the class AlluxioCatalogTest method createMockPartitionedUdbTable.

UdbTable createMockPartitionedUdbTable(String name, Schema schema) throws IOException {
    UdbPartition partition = Mockito.mock(UdbPartition.class);
    when(partition.getSpec()).thenReturn(name);
    when(partition.getLayout()).thenReturn(new HiveLayout(PartitionInfo.getDefaultInstance(), Collections.emptyList()));
    UdbTable tbl = Mockito.mock(UdbTable.class);
    when(tbl.getName()).thenReturn(name);
    when(tbl.getSchema()).thenReturn(schema);
    when(tbl.getStatistics()).thenReturn(createRandomStatsForSchema(schema));
    when(tbl.getPartitions()).thenReturn(Arrays.asList(partition, partition));
    when(tbl.getPartitionCols()).thenReturn(Arrays.asList(FieldSchema.getDefaultInstance()));
    when(tbl.getLayout()).thenReturn(new HiveLayout(PartitionInfo.getDefaultInstance(), Collections.emptyList()).toProto());
    return tbl;
}
Also used : UdbTable(alluxio.table.common.udb.UdbTable) HiveLayout(alluxio.table.common.layout.HiveLayout) UdbPartition(alluxio.table.common.UdbPartition)

Aggregations

UdbPartition (alluxio.table.common.UdbPartition)4 HiveLayout (alluxio.table.common.layout.HiveLayout)4 UdbTable (alluxio.table.common.udb.UdbTable)4 NotFoundException (alluxio.exception.status.NotFoundException)2 ColumnStatisticsInfo (alluxio.grpc.table.ColumnStatisticsInfo)2 Layout (alluxio.grpc.table.Layout)2 PartitionInfo (alluxio.grpc.table.layout.hive.PartitionInfo)2 PathTranslator (alluxio.table.common.udb.PathTranslator)2 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 List (java.util.List)2 AlluxioURI (alluxio.AlluxioURI)1 AlluxioException (alluxio.exception.AlluxioException)1 DatabaseInfo (alluxio.master.table.DatabaseInfo)1 CloseableResource (alluxio.resource.CloseableResource)1 UdbBypassSpec (alluxio.table.common.udb.UdbBypassSpec)1 UdbConfiguration (alluxio.table.common.udb.UdbConfiguration)1 UdbContext (alluxio.table.common.udb.UdbContext)1 UdbUtils (alluxio.table.common.udb.UdbUtils)1