Search in sources :

Example 1 with UdbBypassSpec

use of alluxio.table.common.udb.UdbBypassSpec in project alluxio by Alluxio.

the class DbConfigTest method convertToUdbBypassSpec.

@Test
public void convertToUdbBypassSpec() throws Exception {
    DbConfig.BypassEntry entry = mMapper.readValue("{\"tables\": [\"table1\"]}", DbConfig.BypassEntry.class);
    assertEquals(ImmutableSet.of("table1"), entry.getBypassedTables());
    UdbBypassSpec spec = entry.toUdbBypassSpec();
    assertTrue(spec.hasTable("table1"));
}
Also used : UdbBypassSpec(alluxio.table.common.udb.UdbBypassSpec) Test(org.junit.Test)

Example 2 with UdbBypassSpec

use of alluxio.table.common.udb.UdbBypassSpec in project alluxio by Alluxio.

the class HiveDatabase method getTable.

@Override
public UdbTable getTable(String tableName, UdbBypassSpec bypassSpec) throws IOException {
    try {
        Table table;
        List<Partition> partitions;
        List<ColumnStatisticsObj> columnStats;
        List<String> partitionColumns;
        Map<String, List<ColumnStatisticsInfo>> statsMap = new HashMap<>();
        // perform all the hive client operations, and release the client early.
        try (CloseableResource<IMetaStoreClient> client = mClientPool.acquireClientResource()) {
            table = client.get().getTable(mHiveDbName, tableName);
            // Potentially expensive call
            partitions = client.get().listPartitions(mHiveDbName, table.getTableName(), (short) -1);
            List<String> colNames = table.getSd().getCols().stream().map(FieldSchema::getName).collect(Collectors.toList());
            columnStats = client.get().getTableColumnStatistics(mHiveDbName, tableName, colNames);
            // construct the partition statistics
            List<String> dataColumns = table.getSd().getCols().stream().map(org.apache.hadoop.hive.metastore.api.FieldSchema::getName).collect(Collectors.toList());
            partitionColumns = table.getPartitionKeys().stream().map(org.apache.hadoop.hive.metastore.api.FieldSchema::getName).collect(Collectors.toList());
            List<String> partitionNames = partitions.stream().map(partition -> FileUtils.makePartName(partitionColumns, partition.getValues())).collect(Collectors.toList());
            for (List<String> partialPartitionNames : Lists.partition(partitionNames, MAX_PARTITION_COLUMN_STATISTICS)) {
                statsMap.putAll(client.get().getPartitionColumnStatistics(mHiveDbName, tableName, partialPartitionNames, dataColumns).entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, e -> e.getValue().stream().map(HiveUtils::toProto).collect(Collectors.toList()), (e1, e2) -> e2)));
            }
        }
        PathTranslator pathTranslator = mountAlluxioPaths(table, partitions, bypassSpec);
        List<ColumnStatisticsInfo> colStats = columnStats.stream().map(HiveUtils::toProto).collect(Collectors.toList());
        // construct table layout
        PartitionInfo partitionInfo = PartitionInfo.newBuilder().setDbName(getUdbContext().getDbName()).setTableName(tableName).addAllDataCols(HiveUtils.toProto(table.getSd().getCols())).setStorage(HiveUtils.toProto(table.getSd(), pathTranslator)).putAllParameters(table.getParameters()).build();
        Layout layout = Layout.newBuilder().setLayoutType(HiveLayout.TYPE).setLayoutData(partitionInfo.toByteString()).build();
        // create udb partitions info
        List<UdbPartition> udbPartitions = new ArrayList<>();
        if (partitionColumns.isEmpty()) {
            // unpartitioned table, generate a partition
            PartitionInfo.Builder pib = PartitionInfo.newBuilder().setDbName(getUdbContext().getDbName()).setTableName(tableName).addAllDataCols(HiveUtils.toProto(table.getSd().getCols())).setStorage(HiveUtils.toProto(table.getSd(), pathTranslator)).setPartitionName(tableName).putAllParameters(table.getParameters());
            udbPartitions.add(new HivePartition(new HiveLayout(pib.build(), Collections.emptyList())));
        } else {
            for (Partition partition : partitions) {
                String partName = FileUtils.makePartName(partitionColumns, partition.getValues());
                PartitionInfo.Builder pib = PartitionInfo.newBuilder().setDbName(getUdbContext().getDbName()).setTableName(tableName).addAllDataCols(HiveUtils.toProto(partition.getSd().getCols())).setStorage(HiveUtils.toProto(partition.getSd(), pathTranslator)).setPartitionName(partName).putAllParameters(partition.getParameters());
                if (partition.getValues() != null) {
                    pib.addAllValues(partition.getValues());
                }
                udbPartitions.add(new HivePartition(new HiveLayout(pib.build(), statsMap.getOrDefault(partName, Collections.emptyList()))));
            }
        }
        return new HiveTable(tableName, HiveUtils.toProtoSchema(table.getSd().getCols()), colStats, HiveUtils.toProto(table.getPartitionKeys()), udbPartitions, layout, table);
    } catch (NoSuchObjectException e) {
        throw new NotFoundException("Table " + tableName + " does not exist.", e);
    } catch (TException e) {
        throw new IOException("Failed to get table: " + tableName + " error: " + e.getMessage(), e);
    }
}
Also used : ColumnStatisticsInfo(alluxio.grpc.table.ColumnStatisticsInfo) UdbUtils(alluxio.table.common.udb.UdbUtils) UnderDatabase(alluxio.table.common.udb.UnderDatabase) UdbPartition(alluxio.table.common.UdbPartition) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) UdbContext(alluxio.table.common.udb.UdbContext) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) Partition(org.apache.hadoop.hive.metastore.api.Partition) Warehouse(org.apache.hadoop.hive.metastore.Warehouse) ArrayList(java.util.ArrayList) DatabaseInfo(alluxio.master.table.DatabaseInfo) PathUtils(alluxio.util.io.PathUtils) HiveClientPoolCache(alluxio.table.under.hive.util.HiveClientPoolCache) Lists(com.google.common.collect.Lists) CloseableResource(alluxio.resource.CloseableResource) AbstractHiveClientPool(alluxio.table.under.hive.util.AbstractHiveClientPool) AlluxioURI(alluxio.AlluxioURI) UdbBypassSpec(alluxio.table.common.udb.UdbBypassSpec) Map(java.util.Map) UdbConfiguration(alluxio.table.common.udb.UdbConfiguration) Logger(org.slf4j.Logger) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) Layout(alluxio.grpc.table.Layout) TException(org.apache.thrift.TException) AlluxioException(alluxio.exception.AlluxioException) PrincipalType(org.apache.hadoop.hive.metastore.api.PrincipalType) IOException(java.io.IOException) NotFoundException(alluxio.exception.status.NotFoundException) Collectors(java.util.stream.Collectors) Table(org.apache.hadoop.hive.metastore.api.Table) Objects(java.util.Objects) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) PartitionInfo(alluxio.grpc.table.layout.hive.PartitionInfo) List(java.util.List) PathTranslator(alluxio.table.common.udb.PathTranslator) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) UdbTable(alluxio.table.common.udb.UdbTable) FileUtils(org.apache.hadoop.hive.common.FileUtils) VisibleForTesting(com.google.common.annotations.VisibleForTesting) HiveLayout(alluxio.table.common.layout.HiveLayout) Database(org.apache.hadoop.hive.metastore.api.Database) Collections(java.util.Collections) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) TException(org.apache.thrift.TException) HiveLayout(alluxio.table.common.layout.HiveLayout) HashMap(java.util.HashMap) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) NotFoundException(alluxio.exception.status.NotFoundException) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) UdbPartition(alluxio.table.common.UdbPartition) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) ArrayList(java.util.ArrayList) List(java.util.List) PartitionInfo(alluxio.grpc.table.layout.hive.PartitionInfo) UdbPartition(alluxio.table.common.UdbPartition) Partition(org.apache.hadoop.hive.metastore.api.Partition) Table(org.apache.hadoop.hive.metastore.api.Table) UdbTable(alluxio.table.common.udb.UdbTable) IOException(java.io.IOException) PathTranslator(alluxio.table.common.udb.PathTranslator) Layout(alluxio.grpc.table.Layout) HiveLayout(alluxio.table.common.layout.HiveLayout) ColumnStatisticsInfo(alluxio.grpc.table.ColumnStatisticsInfo) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException)

Aggregations

UdbBypassSpec (alluxio.table.common.udb.UdbBypassSpec)2 AlluxioURI (alluxio.AlluxioURI)1 AlluxioException (alluxio.exception.AlluxioException)1 NotFoundException (alluxio.exception.status.NotFoundException)1 ColumnStatisticsInfo (alluxio.grpc.table.ColumnStatisticsInfo)1 Layout (alluxio.grpc.table.Layout)1 PartitionInfo (alluxio.grpc.table.layout.hive.PartitionInfo)1 DatabaseInfo (alluxio.master.table.DatabaseInfo)1 CloseableResource (alluxio.resource.CloseableResource)1 UdbPartition (alluxio.table.common.UdbPartition)1 HiveLayout (alluxio.table.common.layout.HiveLayout)1 PathTranslator (alluxio.table.common.udb.PathTranslator)1 UdbConfiguration (alluxio.table.common.udb.UdbConfiguration)1 UdbContext (alluxio.table.common.udb.UdbContext)1 UdbTable (alluxio.table.common.udb.UdbTable)1 UdbUtils (alluxio.table.common.udb.UdbUtils)1 UnderDatabase (alluxio.table.common.udb.UnderDatabase)1 AbstractHiveClientPool (alluxio.table.under.hive.util.AbstractHiveClientPool)1 HiveClientPoolCache (alluxio.table.under.hive.util.HiveClientPoolCache)1 PathUtils (alluxio.util.io.PathUtils)1