Search in sources :

Example 1 with PathTranslator

use of alluxio.table.common.udb.PathTranslator in project alluxio by Alluxio.

the class HiveDatabase method mountAlluxioPaths.

private PathTranslator mountAlluxioPaths(Table table, List<Partition> partitions, UdbBypassSpec bypassSpec) throws IOException {
    String tableName = table.getTableName();
    AlluxioURI ufsUri;
    AlluxioURI alluxioUri = mUdbContext.getTableLocation(tableName);
    String hiveUfsUri = table.getSd().getLocation();
    try {
        PathTranslator pathTranslator = new PathTranslator();
        if (bypassSpec.hasFullTable(tableName)) {
            pathTranslator.addMapping(hiveUfsUri, hiveUfsUri);
            return pathTranslator;
        }
        ufsUri = new AlluxioURI(table.getSd().getLocation());
        pathTranslator.addMapping(UdbUtils.mountAlluxioPath(tableName, ufsUri, alluxioUri, mUdbContext, mConfiguration), hiveUfsUri);
        for (Partition part : partitions) {
            AlluxioURI partitionUri;
            if (part.getSd() != null && part.getSd().getLocation() != null) {
                partitionUri = new AlluxioURI(part.getSd().getLocation());
                if (!mConfiguration.getBoolean(Property.ALLOW_DIFF_PART_LOC_PREFIX) && !ufsUri.isAncestorOf(partitionUri)) {
                    continue;
                }
                hiveUfsUri = part.getSd().getLocation();
                String partName = part.getValues().toString();
                try {
                    partName = Warehouse.makePartName(table.getPartitionKeys(), part.getValues());
                } catch (MetaException e) {
                    LOG.warn("Error making partition name for table {}, partition {}", tableName, part.getValues().toString());
                }
                if (bypassSpec.hasPartition(tableName, partName)) {
                    pathTranslator.addMapping(partitionUri.getPath(), partitionUri.getPath());
                    continue;
                }
                alluxioUri = new AlluxioURI(PathUtils.concatPath(mUdbContext.getTableLocation(tableName).getPath(), partName));
                // mount partition path if it is not already mounted as part of the table path mount
                pathTranslator.addMapping(UdbUtils.mountAlluxioPath(tableName, partitionUri, alluxioUri, mUdbContext, mConfiguration), hiveUfsUri);
            }
        }
        return pathTranslator;
    } catch (AlluxioException e) {
        throw new IOException("Failed to mount table location. tableName: " + tableName + " hiveUfsLocation: " + hiveUfsUri + " AlluxioLocation: " + alluxioUri + " error: " + e.getMessage(), e);
    }
}
Also used : UdbPartition(alluxio.table.common.UdbPartition) Partition(org.apache.hadoop.hive.metastore.api.Partition) PathTranslator(alluxio.table.common.udb.PathTranslator) IOException(java.io.IOException) AlluxioURI(alluxio.AlluxioURI) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) AlluxioException(alluxio.exception.AlluxioException)

Example 2 with PathTranslator

use of alluxio.table.common.udb.PathTranslator in project alluxio by Alluxio.

the class GlueDatabase method mountAlluxioPaths.

@VisibleForTesting
private PathTranslator mountAlluxioPaths(Table table, List<Partition> partitions, UdbBypassSpec bypassSpec) throws IOException {
    String tableName = table.getName();
    AlluxioURI ufsUri;
    AlluxioURI alluxioUri = mUdbContext.getTableLocation(tableName);
    String glueUfsUri = table.getStorageDescriptor().getLocation();
    try {
        PathTranslator pathTranslator = new PathTranslator();
        if (bypassSpec.hasFullTable(tableName)) {
            pathTranslator.addMapping(glueUfsUri, glueUfsUri);
            return pathTranslator;
        }
        ufsUri = new AlluxioURI(table.getStorageDescriptor().getLocation());
        pathTranslator.addMapping(UdbUtils.mountAlluxioPath(tableName, ufsUri, alluxioUri, mUdbContext, mGlueConfiguration), glueUfsUri);
        for (Partition partition : partitions) {
            AlluxioURI partitionUri;
            String partitionName;
            if (partition.getStorageDescriptor() != null && partition.getStorageDescriptor().getLocation() != null && ufsUri.isAncestorOf(partitionUri = new AlluxioURI(partition.getStorageDescriptor().getLocation()))) {
                glueUfsUri = partition.getStorageDescriptor().getLocation();
                partitionName = partition.getValues().toString();
                try {
                    partitionName = GlueUtils.makePartitionName(table.getPartitionKeys(), partition.getValues());
                } catch (IOException e) {
                    LOG.warn("Error making partition name for table {}," + " partition {} in database {} with CatalogID {}.", tableName, partition.getValues().toString(), mGlueDbName, mGlueConfiguration.get(Property.CATALOG_ID));
                }
                if (bypassSpec.hasPartition(tableName, partitionName)) {
                    pathTranslator.addMapping(partitionUri.getPath(), partitionUri.getPath());
                    continue;
                }
                alluxioUri = new AlluxioURI(PathUtils.concatPath(mUdbContext.getTableLocation(tableName).getPath(), partitionName));
                // mount partition path if it is not already mounted as part of the table path mount
                pathTranslator.addMapping(UdbUtils.mountAlluxioPath(tableName, partitionUri, alluxioUri, mUdbContext, mGlueConfiguration), glueUfsUri);
            }
        }
        return pathTranslator;
    } catch (AlluxioException e) {
        throw new IOException("Failed to mount table location. tableName: " + tableName + " glueUfsLocation: " + glueUfsUri + " AlluxioLocation: " + alluxioUri + " error: " + e.getMessage(), e);
    }
}
Also used : UdbPartition(alluxio.table.common.UdbPartition) Partition(com.amazonaws.services.glue.model.Partition) PathTranslator(alluxio.table.common.udb.PathTranslator) IOException(java.io.IOException) AlluxioURI(alluxio.AlluxioURI) AlluxioException(alluxio.exception.AlluxioException) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 3 with PathTranslator

use of alluxio.table.common.udb.PathTranslator in project alluxio by Alluxio.

the class GlueDatabase method getTable.

@Override
public UdbTable getTable(String tableName, UdbBypassSpec bypassSpec) throws IOException {
    Table table;
    List<Partition> partitions;
    try {
        GetTableRequest tableRequest = new GetTableRequest().withCatalogId(mGlueConfiguration.get(Property.CATALOG_ID)).withDatabaseName(mGlueDbName).withName(tableName);
        table = getClient().getTable(tableRequest).getTable();
        partitions = batchGetPartitions(getClient(), tableName);
        PathTranslator pathTranslator = mountAlluxioPaths(table, partitions, bypassSpec);
        List<Column> partitionColumns;
        if (table.getPartitionKeys() == null) {
            partitionColumns = Collections.emptyList();
        } else {
            partitionColumns = table.getPartitionKeys();
        }
        // Get table parameters
        Map<String, String> tableParameters = table.getParameters() == null ? Collections.emptyMap() : table.getParameters();
        // Get column statistics info for table
        List<String> columnNames = table.getStorageDescriptor().getColumns().stream().map(Column::getName).collect(Collectors.toList());
        GetColumnStatisticsForTableRequest getColumnStatisticsForTableRequest = new GetColumnStatisticsForTableRequest().withCatalogId(mGlueConfiguration.get(Property.CATALOG_ID)).withDatabaseName(mGlueDbName).withTableName(tableName).withColumnNames(columnNames);
        List<ColumnStatisticsInfo> columnStatisticsTableData = new ArrayList<>();
        if (mGlueConfiguration.getBoolean(Property.TABLE_COLUMN_STATISTICS_ENABLE)) {
            columnStatisticsTableData = getTableColumnStatistics(mGlueDbName, tableName, getColumnStatisticsForTableRequest);
        }
        // Get column statistics info for partitions
        // potential expensive call
        Map<String, List<ColumnStatisticsInfo>> statsMap = new HashMap<>();
        if (mGlueConfiguration.getBoolean(Property.PARTITION_COLUMN_STATISTICS_ENABLE)) {
            for (Partition partition : partitions) {
                List<String> partitionValue = partition.getValues();
                if (partitionValue != null) {
                    GetColumnStatisticsForPartitionRequest getColumnStatisticsForPartitionRequest = new GetColumnStatisticsForPartitionRequest().withCatalogId(mGlueConfiguration.get(Property.CATALOG_ID)).withDatabaseName(mGlueDbName).withTableName(tableName).withColumnNames(columnNames).withPartitionValues(partitionValue);
                    String partName = GlueUtils.makePartitionName(partitionColumns, partition.getValues());
                    statsMap.put(partName, getPartitionColumnStatistics(mGlueDbName, tableName, getColumnStatisticsForPartitionRequest));
                }
            }
        }
        PartitionInfo partitionInfo = PartitionInfo.newBuilder().setDbName(mGlueDbName).setTableName(tableName).addAllDataCols(GlueUtils.toProto(table.getStorageDescriptor().getColumns())).setStorage(GlueUtils.toProto(table.getStorageDescriptor(), pathTranslator)).putAllParameters(tableParameters).build();
        Layout layout = Layout.newBuilder().setLayoutType(HiveLayout.TYPE).setLayoutData(partitionInfo.toByteString()).build();
        List<UdbPartition> udbPartitions = new ArrayList<>();
        if (partitionColumns.isEmpty()) {
            PartitionInfo.Builder partitionInfoBuilder = PartitionInfo.newBuilder().setDbName(mGlueDbName).setTableName(tableName).addAllDataCols(GlueUtils.toProto(table.getStorageDescriptor().getColumns())).setStorage(GlueUtils.toProto(table.getStorageDescriptor(), pathTranslator)).setPartitionName(tableName).putAllParameters(tableParameters);
            udbPartitions.add(new GluePartition(new HiveLayout(partitionInfoBuilder.build(), Collections.emptyList())));
        } else {
            for (Partition partition : partitions) {
                String partName = GlueUtils.makePartitionName(partitionColumns, partition.getValues());
                PartitionInfo.Builder partitionInfoBuilder = PartitionInfo.newBuilder().setDbName(mGlueDbName).setTableName(tableName).addAllDataCols(GlueUtils.toProto(partition.getStorageDescriptor().getColumns())).setStorage(GlueUtils.toProto(partition.getStorageDescriptor(), pathTranslator)).setPartitionName(partName).putAllParameters(partition.getParameters() == null ? Collections.emptyMap() : partition.getParameters());
                if (partition.getValues() != null) {
                    partitionInfoBuilder.addAllValues(partition.getValues());
                }
                udbPartitions.add(new GluePartition(new HiveLayout(partitionInfoBuilder.build(), statsMap.getOrDefault(partName, Collections.emptyList()))));
            }
        }
        return new GlueTable(this, pathTranslator, tableName, GlueUtils.toProtoSchema(table.getStorageDescriptor().getColumns()), columnStatisticsTableData, // Get FieldSchema from partition keys
        GlueUtils.toProto(table.getPartitionKeys()), udbPartitions, layout, table);
    } catch (EntityNotFoundException e) {
        throw new NotFoundException("Table " + tableName + " does not exist in Database: " + mGlueDbName + "; Catalog ID: " + mGlueConfiguration.get(Property.CATALOG_ID) + ".", e);
    } catch (ValidationException e) {
        throw new IOException("Failed to get table: " + tableName + " in Database: " + mGlueDbName + "; Catalog ID: " + mGlueConfiguration.get(Property.CATALOG_ID) + " with validation error: " + e.getMessage(), e);
    } catch (GlueEncryptionException e) {
        throw new IOException("Failed to get table: " + tableName + " in Database: " + mGlueDbName + "; Catalog ID: " + mGlueConfiguration.get(Property.CATALOG_ID) + " error: " + e.getMessage(), e);
    }
}
Also used : HiveLayout(alluxio.table.common.layout.HiveLayout) ValidationException(com.amazonaws.services.glue.model.ValidationException) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) EntityNotFoundException(com.amazonaws.services.glue.model.EntityNotFoundException) NotFoundException(alluxio.exception.status.NotFoundException) UdbPartition(alluxio.table.common.UdbPartition) Column(com.amazonaws.services.glue.model.Column) GlueEncryptionException(com.amazonaws.services.glue.model.GlueEncryptionException) List(java.util.List) ArrayList(java.util.ArrayList) PartitionInfo(alluxio.grpc.table.layout.hive.PartitionInfo) UdbPartition(alluxio.table.common.UdbPartition) Partition(com.amazonaws.services.glue.model.Partition) UdbTable(alluxio.table.common.udb.UdbTable) Table(com.amazonaws.services.glue.model.Table) GetColumnStatisticsForTableRequest(com.amazonaws.services.glue.model.GetColumnStatisticsForTableRequest) EntityNotFoundException(com.amazonaws.services.glue.model.EntityNotFoundException) IOException(java.io.IOException) GetTableRequest(com.amazonaws.services.glue.model.GetTableRequest) GetColumnStatisticsForPartitionRequest(com.amazonaws.services.glue.model.GetColumnStatisticsForPartitionRequest) PathTranslator(alluxio.table.common.udb.PathTranslator) Layout(alluxio.grpc.table.Layout) HiveLayout(alluxio.table.common.layout.HiveLayout) ColumnStatisticsInfo(alluxio.grpc.table.ColumnStatisticsInfo)

Example 4 with PathTranslator

use of alluxio.table.common.udb.PathTranslator in project alluxio by Alluxio.

the class HiveDatabase method getTable.

@Override
public UdbTable getTable(String tableName, UdbBypassSpec bypassSpec) throws IOException {
    try {
        Table table;
        List<Partition> partitions;
        List<ColumnStatisticsObj> columnStats;
        List<String> partitionColumns;
        Map<String, List<ColumnStatisticsInfo>> statsMap = new HashMap<>();
        // perform all the hive client operations, and release the client early.
        try (CloseableResource<IMetaStoreClient> client = mClientPool.acquireClientResource()) {
            table = client.get().getTable(mHiveDbName, tableName);
            // Potentially expensive call
            partitions = client.get().listPartitions(mHiveDbName, table.getTableName(), (short) -1);
            List<String> colNames = table.getSd().getCols().stream().map(FieldSchema::getName).collect(Collectors.toList());
            columnStats = client.get().getTableColumnStatistics(mHiveDbName, tableName, colNames);
            // construct the partition statistics
            List<String> dataColumns = table.getSd().getCols().stream().map(org.apache.hadoop.hive.metastore.api.FieldSchema::getName).collect(Collectors.toList());
            partitionColumns = table.getPartitionKeys().stream().map(org.apache.hadoop.hive.metastore.api.FieldSchema::getName).collect(Collectors.toList());
            List<String> partitionNames = partitions.stream().map(partition -> FileUtils.makePartName(partitionColumns, partition.getValues())).collect(Collectors.toList());
            for (List<String> partialPartitionNames : Lists.partition(partitionNames, MAX_PARTITION_COLUMN_STATISTICS)) {
                statsMap.putAll(client.get().getPartitionColumnStatistics(mHiveDbName, tableName, partialPartitionNames, dataColumns).entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, e -> e.getValue().stream().map(HiveUtils::toProto).collect(Collectors.toList()), (e1, e2) -> e2)));
            }
        }
        PathTranslator pathTranslator = mountAlluxioPaths(table, partitions, bypassSpec);
        List<ColumnStatisticsInfo> colStats = columnStats.stream().map(HiveUtils::toProto).collect(Collectors.toList());
        // construct table layout
        PartitionInfo partitionInfo = PartitionInfo.newBuilder().setDbName(getUdbContext().getDbName()).setTableName(tableName).addAllDataCols(HiveUtils.toProto(table.getSd().getCols())).setStorage(HiveUtils.toProto(table.getSd(), pathTranslator)).putAllParameters(table.getParameters()).build();
        Layout layout = Layout.newBuilder().setLayoutType(HiveLayout.TYPE).setLayoutData(partitionInfo.toByteString()).build();
        // create udb partitions info
        List<UdbPartition> udbPartitions = new ArrayList<>();
        if (partitionColumns.isEmpty()) {
            // unpartitioned table, generate a partition
            PartitionInfo.Builder pib = PartitionInfo.newBuilder().setDbName(getUdbContext().getDbName()).setTableName(tableName).addAllDataCols(HiveUtils.toProto(table.getSd().getCols())).setStorage(HiveUtils.toProto(table.getSd(), pathTranslator)).setPartitionName(tableName).putAllParameters(table.getParameters());
            udbPartitions.add(new HivePartition(new HiveLayout(pib.build(), Collections.emptyList())));
        } else {
            for (Partition partition : partitions) {
                String partName = FileUtils.makePartName(partitionColumns, partition.getValues());
                PartitionInfo.Builder pib = PartitionInfo.newBuilder().setDbName(getUdbContext().getDbName()).setTableName(tableName).addAllDataCols(HiveUtils.toProto(partition.getSd().getCols())).setStorage(HiveUtils.toProto(partition.getSd(), pathTranslator)).setPartitionName(partName).putAllParameters(partition.getParameters());
                if (partition.getValues() != null) {
                    pib.addAllValues(partition.getValues());
                }
                udbPartitions.add(new HivePartition(new HiveLayout(pib.build(), statsMap.getOrDefault(partName, Collections.emptyList()))));
            }
        }
        return new HiveTable(tableName, HiveUtils.toProtoSchema(table.getSd().getCols()), colStats, HiveUtils.toProto(table.getPartitionKeys()), udbPartitions, layout, table);
    } catch (NoSuchObjectException e) {
        throw new NotFoundException("Table " + tableName + " does not exist.", e);
    } catch (TException e) {
        throw new IOException("Failed to get table: " + tableName + " error: " + e.getMessage(), e);
    }
}
Also used : ColumnStatisticsInfo(alluxio.grpc.table.ColumnStatisticsInfo) UdbUtils(alluxio.table.common.udb.UdbUtils) UnderDatabase(alluxio.table.common.udb.UnderDatabase) UdbPartition(alluxio.table.common.UdbPartition) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) UdbContext(alluxio.table.common.udb.UdbContext) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) Partition(org.apache.hadoop.hive.metastore.api.Partition) Warehouse(org.apache.hadoop.hive.metastore.Warehouse) ArrayList(java.util.ArrayList) DatabaseInfo(alluxio.master.table.DatabaseInfo) PathUtils(alluxio.util.io.PathUtils) HiveClientPoolCache(alluxio.table.under.hive.util.HiveClientPoolCache) Lists(com.google.common.collect.Lists) CloseableResource(alluxio.resource.CloseableResource) AbstractHiveClientPool(alluxio.table.under.hive.util.AbstractHiveClientPool) AlluxioURI(alluxio.AlluxioURI) UdbBypassSpec(alluxio.table.common.udb.UdbBypassSpec) Map(java.util.Map) UdbConfiguration(alluxio.table.common.udb.UdbConfiguration) Logger(org.slf4j.Logger) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) Layout(alluxio.grpc.table.Layout) TException(org.apache.thrift.TException) AlluxioException(alluxio.exception.AlluxioException) PrincipalType(org.apache.hadoop.hive.metastore.api.PrincipalType) IOException(java.io.IOException) NotFoundException(alluxio.exception.status.NotFoundException) Collectors(java.util.stream.Collectors) Table(org.apache.hadoop.hive.metastore.api.Table) Objects(java.util.Objects) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) PartitionInfo(alluxio.grpc.table.layout.hive.PartitionInfo) List(java.util.List) PathTranslator(alluxio.table.common.udb.PathTranslator) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) UdbTable(alluxio.table.common.udb.UdbTable) FileUtils(org.apache.hadoop.hive.common.FileUtils) VisibleForTesting(com.google.common.annotations.VisibleForTesting) HiveLayout(alluxio.table.common.layout.HiveLayout) Database(org.apache.hadoop.hive.metastore.api.Database) Collections(java.util.Collections) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) TException(org.apache.thrift.TException) HiveLayout(alluxio.table.common.layout.HiveLayout) HashMap(java.util.HashMap) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) NotFoundException(alluxio.exception.status.NotFoundException) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) UdbPartition(alluxio.table.common.UdbPartition) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) ArrayList(java.util.ArrayList) List(java.util.List) PartitionInfo(alluxio.grpc.table.layout.hive.PartitionInfo) UdbPartition(alluxio.table.common.UdbPartition) Partition(org.apache.hadoop.hive.metastore.api.Partition) Table(org.apache.hadoop.hive.metastore.api.Table) UdbTable(alluxio.table.common.udb.UdbTable) IOException(java.io.IOException) PathTranslator(alluxio.table.common.udb.PathTranslator) Layout(alluxio.grpc.table.Layout) HiveLayout(alluxio.table.common.layout.HiveLayout) ColumnStatisticsInfo(alluxio.grpc.table.ColumnStatisticsInfo) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException)

Aggregations

UdbPartition (alluxio.table.common.UdbPartition)4 PathTranslator (alluxio.table.common.udb.PathTranslator)4 IOException (java.io.IOException)4 AlluxioURI (alluxio.AlluxioURI)3 AlluxioException (alluxio.exception.AlluxioException)3 NotFoundException (alluxio.exception.status.NotFoundException)2 ColumnStatisticsInfo (alluxio.grpc.table.ColumnStatisticsInfo)2 Layout (alluxio.grpc.table.Layout)2 PartitionInfo (alluxio.grpc.table.layout.hive.PartitionInfo)2 HiveLayout (alluxio.table.common.layout.HiveLayout)2 UdbTable (alluxio.table.common.udb.UdbTable)2 Partition (com.amazonaws.services.glue.model.Partition)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)2 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 List (java.util.List)2 DatabaseInfo (alluxio.master.table.DatabaseInfo)1 CloseableResource (alluxio.resource.CloseableResource)1 UdbBypassSpec (alluxio.table.common.udb.UdbBypassSpec)1 UdbConfiguration (alluxio.table.common.udb.UdbConfiguration)1