Search in sources :

Example 1 with Column

use of com.amazonaws.services.glue.model.Column in project alluxio by Alluxio.

the class GlueUtils method makePartitionName.

/**
 * Align to hive makePartName, convert glue partition information to alluxio partition name.
 *
 * @param columns glue table partition keys
 * @param partitionValues glue partition values
 * @return partition name
 * @throws IOException
 */
public static String makePartitionName(List<Column> columns, List<String> partitionValues) throws IOException {
    if ((columns.size() != partitionValues.size()) || columns.size() == 0) {
        String errorMesg = "Invalid partition key & values; key [";
        for (Column column : columns) {
            errorMesg += (column.getName() + ",");
        }
        errorMesg += "], values [";
        for (String partitionValue : partitionValues) {
            errorMesg += (partitionValue + ", ");
        }
        throw new IOException(errorMesg + "]");
    }
    List<String> columnNames = new ArrayList<>();
    for (Column column : columns) {
        columnNames.add(column.getName());
    }
    return makePartName(columnNames, partitionValues);
}
Also used : SortingColumn(alluxio.grpc.table.layout.hive.SortingColumn) Column(com.amazonaws.services.glue.model.Column) ArrayList(java.util.ArrayList) ByteString(com.google.protobuf.ByteString) IOException(java.io.IOException)

Example 2 with Column

use of com.amazonaws.services.glue.model.Column in project alluxio by Alluxio.

the class GlueDatabase method getTable.

@Override
public UdbTable getTable(String tableName, UdbBypassSpec bypassSpec) throws IOException {
    Table table;
    List<Partition> partitions;
    try {
        GetTableRequest tableRequest = new GetTableRequest().withCatalogId(mGlueConfiguration.get(Property.CATALOG_ID)).withDatabaseName(mGlueDbName).withName(tableName);
        table = getClient().getTable(tableRequest).getTable();
        partitions = batchGetPartitions(getClient(), tableName);
        PathTranslator pathTranslator = mountAlluxioPaths(table, partitions, bypassSpec);
        List<Column> partitionColumns;
        if (table.getPartitionKeys() == null) {
            partitionColumns = Collections.emptyList();
        } else {
            partitionColumns = table.getPartitionKeys();
        }
        // Get table parameters
        Map<String, String> tableParameters = table.getParameters() == null ? Collections.emptyMap() : table.getParameters();
        // Get column statistics info for table
        List<String> columnNames = table.getStorageDescriptor().getColumns().stream().map(Column::getName).collect(Collectors.toList());
        GetColumnStatisticsForTableRequest getColumnStatisticsForTableRequest = new GetColumnStatisticsForTableRequest().withCatalogId(mGlueConfiguration.get(Property.CATALOG_ID)).withDatabaseName(mGlueDbName).withTableName(tableName).withColumnNames(columnNames);
        List<ColumnStatisticsInfo> columnStatisticsTableData = new ArrayList<>();
        if (mGlueConfiguration.getBoolean(Property.TABLE_COLUMN_STATISTICS_ENABLE)) {
            columnStatisticsTableData = getTableColumnStatistics(mGlueDbName, tableName, getColumnStatisticsForTableRequest);
        }
        // Get column statistics info for partitions
        // potential expensive call
        Map<String, List<ColumnStatisticsInfo>> statsMap = new HashMap<>();
        if (mGlueConfiguration.getBoolean(Property.PARTITION_COLUMN_STATISTICS_ENABLE)) {
            for (Partition partition : partitions) {
                List<String> partitionValue = partition.getValues();
                if (partitionValue != null) {
                    GetColumnStatisticsForPartitionRequest getColumnStatisticsForPartitionRequest = new GetColumnStatisticsForPartitionRequest().withCatalogId(mGlueConfiguration.get(Property.CATALOG_ID)).withDatabaseName(mGlueDbName).withTableName(tableName).withColumnNames(columnNames).withPartitionValues(partitionValue);
                    String partName = GlueUtils.makePartitionName(partitionColumns, partition.getValues());
                    statsMap.put(partName, getPartitionColumnStatistics(mGlueDbName, tableName, getColumnStatisticsForPartitionRequest));
                }
            }
        }
        PartitionInfo partitionInfo = PartitionInfo.newBuilder().setDbName(mGlueDbName).setTableName(tableName).addAllDataCols(GlueUtils.toProto(table.getStorageDescriptor().getColumns())).setStorage(GlueUtils.toProto(table.getStorageDescriptor(), pathTranslator)).putAllParameters(tableParameters).build();
        Layout layout = Layout.newBuilder().setLayoutType(HiveLayout.TYPE).setLayoutData(partitionInfo.toByteString()).build();
        List<UdbPartition> udbPartitions = new ArrayList<>();
        if (partitionColumns.isEmpty()) {
            PartitionInfo.Builder partitionInfoBuilder = PartitionInfo.newBuilder().setDbName(mGlueDbName).setTableName(tableName).addAllDataCols(GlueUtils.toProto(table.getStorageDescriptor().getColumns())).setStorage(GlueUtils.toProto(table.getStorageDescriptor(), pathTranslator)).setPartitionName(tableName).putAllParameters(tableParameters);
            udbPartitions.add(new GluePartition(new HiveLayout(partitionInfoBuilder.build(), Collections.emptyList())));
        } else {
            for (Partition partition : partitions) {
                String partName = GlueUtils.makePartitionName(partitionColumns, partition.getValues());
                PartitionInfo.Builder partitionInfoBuilder = PartitionInfo.newBuilder().setDbName(mGlueDbName).setTableName(tableName).addAllDataCols(GlueUtils.toProto(partition.getStorageDescriptor().getColumns())).setStorage(GlueUtils.toProto(partition.getStorageDescriptor(), pathTranslator)).setPartitionName(partName).putAllParameters(partition.getParameters() == null ? Collections.emptyMap() : partition.getParameters());
                if (partition.getValues() != null) {
                    partitionInfoBuilder.addAllValues(partition.getValues());
                }
                udbPartitions.add(new GluePartition(new HiveLayout(partitionInfoBuilder.build(), statsMap.getOrDefault(partName, Collections.emptyList()))));
            }
        }
        return new GlueTable(this, pathTranslator, tableName, GlueUtils.toProtoSchema(table.getStorageDescriptor().getColumns()), columnStatisticsTableData, // Get FieldSchema from partition keys
        GlueUtils.toProto(table.getPartitionKeys()), udbPartitions, layout, table);
    } catch (EntityNotFoundException e) {
        throw new NotFoundException("Table " + tableName + " does not exist in Database: " + mGlueDbName + "; Catalog ID: " + mGlueConfiguration.get(Property.CATALOG_ID) + ".", e);
    } catch (ValidationException e) {
        throw new IOException("Failed to get table: " + tableName + " in Database: " + mGlueDbName + "; Catalog ID: " + mGlueConfiguration.get(Property.CATALOG_ID) + " with validation error: " + e.getMessage(), e);
    } catch (GlueEncryptionException e) {
        throw new IOException("Failed to get table: " + tableName + " in Database: " + mGlueDbName + "; Catalog ID: " + mGlueConfiguration.get(Property.CATALOG_ID) + " error: " + e.getMessage(), e);
    }
}
Also used : HiveLayout(alluxio.table.common.layout.HiveLayout) ValidationException(com.amazonaws.services.glue.model.ValidationException) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) EntityNotFoundException(com.amazonaws.services.glue.model.EntityNotFoundException) NotFoundException(alluxio.exception.status.NotFoundException) UdbPartition(alluxio.table.common.UdbPartition) Column(com.amazonaws.services.glue.model.Column) GlueEncryptionException(com.amazonaws.services.glue.model.GlueEncryptionException) List(java.util.List) ArrayList(java.util.ArrayList) PartitionInfo(alluxio.grpc.table.layout.hive.PartitionInfo) UdbPartition(alluxio.table.common.UdbPartition) Partition(com.amazonaws.services.glue.model.Partition) UdbTable(alluxio.table.common.udb.UdbTable) Table(com.amazonaws.services.glue.model.Table) GetColumnStatisticsForTableRequest(com.amazonaws.services.glue.model.GetColumnStatisticsForTableRequest) EntityNotFoundException(com.amazonaws.services.glue.model.EntityNotFoundException) IOException(java.io.IOException) GetTableRequest(com.amazonaws.services.glue.model.GetTableRequest) GetColumnStatisticsForPartitionRequest(com.amazonaws.services.glue.model.GetColumnStatisticsForPartitionRequest) PathTranslator(alluxio.table.common.udb.PathTranslator) Layout(alluxio.grpc.table.Layout) HiveLayout(alluxio.table.common.layout.HiveLayout) ColumnStatisticsInfo(alluxio.grpc.table.ColumnStatisticsInfo)

Example 3 with Column

use of com.amazonaws.services.glue.model.Column in project alluxio by Alluxio.

the class GlueUtils method toProto.

/**
 * Convert the Glue FieldSchema to Alluxio FieldSchema.
 *
 * @param glueCloumns Glue FiledSchema
 * @return list of Alluxio FieldSchema
 */
public static List<alluxio.grpc.table.FieldSchema> toProto(List<Column> glueCloumns) {
    if (glueCloumns == null) {
        return Collections.emptyList();
    }
    List<alluxio.grpc.table.FieldSchema> list = new ArrayList<>();
    for (Column column : glueCloumns) {
        alluxio.grpc.table.FieldSchema.Builder builder = alluxio.grpc.table.FieldSchema.newBuilder().setName(column.getName()).setType(column.getType());
        if (column.getComment() != null) {
            builder.setComment(column.getComment());
        }
        list.add(builder.build());
    }
    return list;
}
Also used : SortingColumn(alluxio.grpc.table.layout.hive.SortingColumn) Column(com.amazonaws.services.glue.model.Column) ArrayList(java.util.ArrayList)

Aggregations

Column (com.amazonaws.services.glue.model.Column)3 ArrayList (java.util.ArrayList)3 SortingColumn (alluxio.grpc.table.layout.hive.SortingColumn)2 IOException (java.io.IOException)2 NotFoundException (alluxio.exception.status.NotFoundException)1 ColumnStatisticsInfo (alluxio.grpc.table.ColumnStatisticsInfo)1 Layout (alluxio.grpc.table.Layout)1 PartitionInfo (alluxio.grpc.table.layout.hive.PartitionInfo)1 UdbPartition (alluxio.table.common.UdbPartition)1 HiveLayout (alluxio.table.common.layout.HiveLayout)1 PathTranslator (alluxio.table.common.udb.PathTranslator)1 UdbTable (alluxio.table.common.udb.UdbTable)1 EntityNotFoundException (com.amazonaws.services.glue.model.EntityNotFoundException)1 GetColumnStatisticsForPartitionRequest (com.amazonaws.services.glue.model.GetColumnStatisticsForPartitionRequest)1 GetColumnStatisticsForTableRequest (com.amazonaws.services.glue.model.GetColumnStatisticsForTableRequest)1 GetTableRequest (com.amazonaws.services.glue.model.GetTableRequest)1 GlueEncryptionException (com.amazonaws.services.glue.model.GlueEncryptionException)1 Partition (com.amazonaws.services.glue.model.Partition)1 Table (com.amazonaws.services.glue.model.Table)1 ValidationException (com.amazonaws.services.glue.model.ValidationException)1