use of alluxio.grpc.table.layout.hive.PartitionInfo in project alluxio by Alluxio.
the class HiveLayout method transformLayout.
private HiveLayout transformLayout(AlluxioURI transformedUri, TransformDefinition definition) {
final Properties properties = definition.getProperties();
// TODO(cc): assumption here is the transformed data is in Parquet format.
final StorageFormat.Builder storageFormatBuilder = mPartitionInfo.getStorage().getStorageFormat().toBuilder().setSerde(HiveConstants.PARQUET_SERDE_CLASS).setInputFormat(HiveConstants.PARQUET_INPUT_FORMAT_CLASS).setOutputFormat(HiveConstants.PARQUET_OUTPUT_FORMAT_CLASS);
final String compressionKey = alluxio.job.plan.transform.PartitionInfo.PARQUET_COMPRESSION;
final String compression = properties.getProperty(compressionKey);
if (!StringUtils.isEmpty(compression)) {
storageFormatBuilder.putSerdelibParameters(compressionKey, compression);
}
PartitionInfo info = mPartitionInfo.toBuilder().putAllParameters(mPartitionInfo.getParametersMap()).setStorage(mPartitionInfo.getStorage().toBuilder().setStorageFormat(storageFormatBuilder.build()).setLocation(transformedUri.toString()).build()).build();
List<ColumnStatisticsInfo> stats = new ArrayList<>(mPartitionStatsInfo.values());
return new HiveLayout(info, stats);
}
use of alluxio.grpc.table.layout.hive.PartitionInfo in project presto by prestodb.
the class AlluxioHiveMetastore method getPartitionNamesByParts.
/**
* return a list of partition names by which the values of each partition is at least
* contained which the {@code parts} argument
*
* @param databaseName database name
* @param tableName table name
* @param parts list of values which returned partitions should contain
* @return optionally, a list of strings where each entry is in the form of {key}={value}
*/
public Optional<List<String>> getPartitionNamesByParts(MetastoreContext metastoreContext, String databaseName, String tableName, List<String> parts) {
try {
List<PartitionInfo> partitionInfos = AlluxioProtoUtils.toPartitionInfoList(client.readTable(databaseName, tableName, Constraint.getDefaultInstance()));
// TODO also check for database name equality
partitionInfos = partitionInfos.stream().filter(p -> p.getTableName().equals(tableName)).filter(partition -> {
List<String> values = partition.getValuesList();
if (values.size() != parts.size()) {
return false;
}
for (int i = 0; i < values.size(); i++) {
String constraintPart = parts.get(i);
if (!constraintPart.isEmpty() && !values.get(i).equals(constraintPart)) {
return false;
}
}
return true;
}).collect(toImmutableList());
List<String> partitionNames = partitionInfos.stream().map(PartitionInfo::getPartitionName).collect(toImmutableList());
return Optional.of(partitionNames);
} catch (AlluxioStatusException e) {
throw new PrestoException(HIVE_METASTORE_ERROR, e);
}
}
use of alluxio.grpc.table.layout.hive.PartitionInfo in project presto by prestodb.
the class AlluxioHiveMetastore method getPartitionsByNames.
@Override
public Map<String, Optional<Partition>> getPartitionsByNames(MetastoreContext metastoreContext, String databaseName, String tableName, List<String> partitionNames) {
if (partitionNames.isEmpty()) {
return ImmutableMap.of();
}
try {
// Get all partitions
List<PartitionInfo> partitionInfos = AlluxioProtoUtils.toPartitionInfoList(client.readTable(databaseName, tableName, Constraint.getDefaultInstance()));
// TODO also check for database name equality
partitionInfos = partitionInfos.stream().filter(p -> p.getTableName().equals(tableName)).collect(toImmutableList());
return partitionInfos.stream().filter(p -> partitionNames.stream().anyMatch(p.getPartitionName()::equals)).collect(toImmutableMap(PartitionInfo::getPartitionName, partitionInfo -> Optional.of(AlluxioProtoUtils.fromProto(partitionInfo))));
} catch (AlluxioStatusException e) {
throw new PrestoException(HIVE_METASTORE_ERROR, e);
}
}
use of alluxio.grpc.table.layout.hive.PartitionInfo in project presto by prestodb.
the class AlluxioProtoUtils method fromProto.
public static Table fromProto(alluxio.grpc.table.TableInfo table) {
if (!table.hasLayout()) {
throw new UnsupportedOperationException("Unsupported table metadata. missing layout.");
}
Layout layout = table.getLayout();
if (!alluxio.table.ProtoUtils.isHiveLayout(layout)) {
throw new UnsupportedOperationException("Unsupported table layout: " + layout);
}
try {
PartitionInfo partitionInfo = alluxio.table.ProtoUtils.toHiveLayout(layout);
// compute the data columns
Set<String> partitionColumns = table.getPartitionColsList().stream().map(FieldSchema::getName).collect(toImmutableSet());
List<FieldSchema> dataColumns = table.getSchema().getColsList().stream().filter((field) -> !partitionColumns.contains(field.getName())).collect(toImmutableList());
Table.Builder builder = Table.builder().setDatabaseName(table.getDbName()).setTableName(table.getTableName()).setOwner(table.getOwner()).setTableType(PrestoTableType.OTHER).setDataColumns(dataColumns.stream().map(AlluxioProtoUtils::fromProto).collect(toImmutableList())).setPartitionColumns(table.getPartitionColsList().stream().map(AlluxioProtoUtils::fromProto).collect(toImmutableList())).setParameters(table.getParametersMap()).setViewOriginalText(Optional.empty()).setViewExpandedText(Optional.empty());
alluxio.grpc.table.layout.hive.Storage storage = partitionInfo.getStorage();
// TODO: We should also set storage parameters here when they are available in alluxio.grpc.table.layout.hive.Storage
builder.getStorageBuilder().setSkewed(storage.getSkewed()).setStorageFormat(fromProto(storage.getStorageFormat())).setLocation(storage.getLocation()).setBucketProperty(storage.hasBucketProperty() ? fromProto(storage.getBucketProperty()) : Optional.empty()).setSerdeParameters(storage.getStorageFormat().getSerdelibParametersMap());
return builder.build();
} catch (InvalidProtocolBufferException e) {
throw new IllegalArgumentException("Failed to extract PartitionInfo from TableInfo", e);
}
}
use of alluxio.grpc.table.layout.hive.PartitionInfo in project alluxio by Alluxio.
the class GlueDatabase method getTable.
@Override
public UdbTable getTable(String tableName, UdbBypassSpec bypassSpec) throws IOException {
Table table;
List<Partition> partitions;
try {
GetTableRequest tableRequest = new GetTableRequest().withCatalogId(mGlueConfiguration.get(Property.CATALOG_ID)).withDatabaseName(mGlueDbName).withName(tableName);
table = getClient().getTable(tableRequest).getTable();
partitions = batchGetPartitions(getClient(), tableName);
PathTranslator pathTranslator = mountAlluxioPaths(table, partitions, bypassSpec);
List<Column> partitionColumns;
if (table.getPartitionKeys() == null) {
partitionColumns = Collections.emptyList();
} else {
partitionColumns = table.getPartitionKeys();
}
// Get table parameters
Map<String, String> tableParameters = table.getParameters() == null ? Collections.emptyMap() : table.getParameters();
// Get column statistics info for table
List<String> columnNames = table.getStorageDescriptor().getColumns().stream().map(Column::getName).collect(Collectors.toList());
GetColumnStatisticsForTableRequest getColumnStatisticsForTableRequest = new GetColumnStatisticsForTableRequest().withCatalogId(mGlueConfiguration.get(Property.CATALOG_ID)).withDatabaseName(mGlueDbName).withTableName(tableName).withColumnNames(columnNames);
List<ColumnStatisticsInfo> columnStatisticsTableData = new ArrayList<>();
if (mGlueConfiguration.getBoolean(Property.TABLE_COLUMN_STATISTICS_ENABLE)) {
columnStatisticsTableData = getTableColumnStatistics(mGlueDbName, tableName, getColumnStatisticsForTableRequest);
}
// Get column statistics info for partitions
// potential expensive call
Map<String, List<ColumnStatisticsInfo>> statsMap = new HashMap<>();
if (mGlueConfiguration.getBoolean(Property.PARTITION_COLUMN_STATISTICS_ENABLE)) {
for (Partition partition : partitions) {
List<String> partitionValue = partition.getValues();
if (partitionValue != null) {
GetColumnStatisticsForPartitionRequest getColumnStatisticsForPartitionRequest = new GetColumnStatisticsForPartitionRequest().withCatalogId(mGlueConfiguration.get(Property.CATALOG_ID)).withDatabaseName(mGlueDbName).withTableName(tableName).withColumnNames(columnNames).withPartitionValues(partitionValue);
String partName = GlueUtils.makePartitionName(partitionColumns, partition.getValues());
statsMap.put(partName, getPartitionColumnStatistics(mGlueDbName, tableName, getColumnStatisticsForPartitionRequest));
}
}
}
PartitionInfo partitionInfo = PartitionInfo.newBuilder().setDbName(mGlueDbName).setTableName(tableName).addAllDataCols(GlueUtils.toProto(table.getStorageDescriptor().getColumns())).setStorage(GlueUtils.toProto(table.getStorageDescriptor(), pathTranslator)).putAllParameters(tableParameters).build();
Layout layout = Layout.newBuilder().setLayoutType(HiveLayout.TYPE).setLayoutData(partitionInfo.toByteString()).build();
List<UdbPartition> udbPartitions = new ArrayList<>();
if (partitionColumns.isEmpty()) {
PartitionInfo.Builder partitionInfoBuilder = PartitionInfo.newBuilder().setDbName(mGlueDbName).setTableName(tableName).addAllDataCols(GlueUtils.toProto(table.getStorageDescriptor().getColumns())).setStorage(GlueUtils.toProto(table.getStorageDescriptor(), pathTranslator)).setPartitionName(tableName).putAllParameters(tableParameters);
udbPartitions.add(new GluePartition(new HiveLayout(partitionInfoBuilder.build(), Collections.emptyList())));
} else {
for (Partition partition : partitions) {
String partName = GlueUtils.makePartitionName(partitionColumns, partition.getValues());
PartitionInfo.Builder partitionInfoBuilder = PartitionInfo.newBuilder().setDbName(mGlueDbName).setTableName(tableName).addAllDataCols(GlueUtils.toProto(partition.getStorageDescriptor().getColumns())).setStorage(GlueUtils.toProto(partition.getStorageDescriptor(), pathTranslator)).setPartitionName(partName).putAllParameters(partition.getParameters() == null ? Collections.emptyMap() : partition.getParameters());
if (partition.getValues() != null) {
partitionInfoBuilder.addAllValues(partition.getValues());
}
udbPartitions.add(new GluePartition(new HiveLayout(partitionInfoBuilder.build(), statsMap.getOrDefault(partName, Collections.emptyList()))));
}
}
return new GlueTable(this, pathTranslator, tableName, GlueUtils.toProtoSchema(table.getStorageDescriptor().getColumns()), columnStatisticsTableData, // Get FieldSchema from partition keys
GlueUtils.toProto(table.getPartitionKeys()), udbPartitions, layout, table);
} catch (EntityNotFoundException e) {
throw new NotFoundException("Table " + tableName + " does not exist in Database: " + mGlueDbName + "; Catalog ID: " + mGlueConfiguration.get(Property.CATALOG_ID) + ".", e);
} catch (ValidationException e) {
throw new IOException("Failed to get table: " + tableName + " in Database: " + mGlueDbName + "; Catalog ID: " + mGlueConfiguration.get(Property.CATALOG_ID) + " with validation error: " + e.getMessage(), e);
} catch (GlueEncryptionException e) {
throw new IOException("Failed to get table: " + tableName + " in Database: " + mGlueDbName + "; Catalog ID: " + mGlueConfiguration.get(Property.CATALOG_ID) + " error: " + e.getMessage(), e);
}
}
Aggregations