use of com.amazonaws.services.glue.model.Partition in project alluxio by Alluxio.
the class GlueDatabase method batchGetPartitions.
private List<Partition> batchGetPartitions(AWSGlueAsync glueClient, String tableName) throws IOException {
// TODO(shouwei): make getPartition multi-thread to accelerate the large table fetching
List<Partition> partitions = new ArrayList<>();
String nextToken = null;
try {
do {
GetPartitionsRequest getPartitionsRequest = new GetPartitionsRequest().withCatalogId(mGlueConfiguration.get(Property.CATALOG_ID)).withDatabaseName(mGlueDbName).withTableName(tableName).withMaxResults(mGlueConfiguration.getInt(Property.MAX_GLUE_FETCH_PARTITIONS)).withNextToken(nextToken);
GetPartitionsResult getPartitionsResult = glueClient.getPartitions(getPartitionsRequest);
partitions.addAll(getPartitionsResult.getPartitions());
nextToken = getPartitionsResult.getNextToken();
LOG.debug("Glue table {}.{} adding {} batch partitions with total {} partitions.", mGlueDbName, tableName, getPartitionsResult.getPartitions().size(), partitions.size());
} while (nextToken != null);
if (partitions != null) {
LOG.info("Glue table {}.{} has {} partitions.", mGlueDbName, tableName, partitions.size());
if (LOG.isDebugEnabled()) {
partitions.stream().forEach(partition -> LOG.debug("Glue table {}.{} with partition: {}.", partition.getDatabaseName(), tableName, partition));
}
}
return partitions;
} catch (AWSGlueException e) {
throw new IOException("Cannot get partition information for table: " + tableName + " in Database: " + mGlueDbName + "; Catalog ID: " + mGlueConfiguration.get(Property.CATALOG_ID) + ". error: " + e.getMessage(), e);
}
}
use of com.amazonaws.services.glue.model.Partition in project alluxio by Alluxio.
the class GlueDatabase method mountAlluxioPaths.
@VisibleForTesting
private PathTranslator mountAlluxioPaths(Table table, List<Partition> partitions, UdbBypassSpec bypassSpec) throws IOException {
String tableName = table.getName();
AlluxioURI ufsUri;
AlluxioURI alluxioUri = mUdbContext.getTableLocation(tableName);
String glueUfsUri = table.getStorageDescriptor().getLocation();
try {
PathTranslator pathTranslator = new PathTranslator();
if (bypassSpec.hasFullTable(tableName)) {
pathTranslator.addMapping(glueUfsUri, glueUfsUri);
return pathTranslator;
}
ufsUri = new AlluxioURI(table.getStorageDescriptor().getLocation());
pathTranslator.addMapping(UdbUtils.mountAlluxioPath(tableName, ufsUri, alluxioUri, mUdbContext, mGlueConfiguration), glueUfsUri);
for (Partition partition : partitions) {
AlluxioURI partitionUri;
String partitionName;
if (partition.getStorageDescriptor() != null && partition.getStorageDescriptor().getLocation() != null && ufsUri.isAncestorOf(partitionUri = new AlluxioURI(partition.getStorageDescriptor().getLocation()))) {
glueUfsUri = partition.getStorageDescriptor().getLocation();
partitionName = partition.getValues().toString();
try {
partitionName = GlueUtils.makePartitionName(table.getPartitionKeys(), partition.getValues());
} catch (IOException e) {
LOG.warn("Error making partition name for table {}," + " partition {} in database {} with CatalogID {}.", tableName, partition.getValues().toString(), mGlueDbName, mGlueConfiguration.get(Property.CATALOG_ID));
}
if (bypassSpec.hasPartition(tableName, partitionName)) {
pathTranslator.addMapping(partitionUri.getPath(), partitionUri.getPath());
continue;
}
alluxioUri = new AlluxioURI(PathUtils.concatPath(mUdbContext.getTableLocation(tableName).getPath(), partitionName));
// mount partition path if it is not already mounted as part of the table path mount
pathTranslator.addMapping(UdbUtils.mountAlluxioPath(tableName, partitionUri, alluxioUri, mUdbContext, mGlueConfiguration), glueUfsUri);
}
}
return pathTranslator;
} catch (AlluxioException e) {
throw new IOException("Failed to mount table location. tableName: " + tableName + " glueUfsLocation: " + glueUfsUri + " AlluxioLocation: " + alluxioUri + " error: " + e.getMessage(), e);
}
}
use of com.amazonaws.services.glue.model.Partition in project presto by prestodb.
the class TestGlueToPrestoConverter method testPartitionConversionMemoization.
@Test
public void testPartitionConversionMemoization() {
String fakeS3Location = "s3://some-fake-location";
testPartition.getStorageDescriptor().setLocation(fakeS3Location);
// Second partition to convert with equal (but not aliased) values
Partition partitionTwo = getGlueTestPartition(testPartition.getDatabaseName(), testPartition.getTableName(), new ArrayList<>(testPartition.getValues()));
// Ensure storage fields are equal but not aliased as well
partitionTwo.getStorageDescriptor().setColumns(new ArrayList<>(testPartition.getStorageDescriptor().getColumns()));
partitionTwo.getStorageDescriptor().setBucketColumns(new ArrayList<>(testPartition.getStorageDescriptor().getBucketColumns()));
partitionTwo.getStorageDescriptor().setLocation("" + fakeS3Location);
partitionTwo.getStorageDescriptor().setInputFormat("" + testPartition.getStorageDescriptor().getInputFormat());
partitionTwo.getStorageDescriptor().setOutputFormat("" + testPartition.getStorageDescriptor().getOutputFormat());
partitionTwo.getStorageDescriptor().setParameters(new HashMap<>(testPartition.getStorageDescriptor().getParameters()));
GluePartitionConverter converter = new GluePartitionConverter(testDb.getName(), testTbl.getName());
com.facebook.presto.hive.metastore.Partition prestoPartition = converter.apply(testPartition);
com.facebook.presto.hive.metastore.Partition prestoPartition2 = converter.apply(partitionTwo);
assertNotSame(prestoPartition, prestoPartition2);
assertSame(prestoPartition2.getDatabaseName(), prestoPartition.getDatabaseName());
assertSame(prestoPartition2.getTableName(), prestoPartition.getTableName());
assertSame(prestoPartition2.getColumns(), prestoPartition.getColumns());
assertSame(prestoPartition2.getParameters(), prestoPartition.getParameters());
assertNotSame(prestoPartition2.getValues(), prestoPartition.getValues());
Storage storage = prestoPartition.getStorage();
Storage storage2 = prestoPartition2.getStorage();
assertSame(storage2.getStorageFormat(), storage.getStorageFormat());
assertSame(storage2.getBucketProperty(), storage.getBucketProperty());
assertSame(storage2.getSerdeParameters(), storage.getSerdeParameters());
assertNotSame(storage2.getLocation(), storage.getLocation());
}
use of com.amazonaws.services.glue.model.Partition in project alluxio by Alluxio.
the class GlueDatabase method getTable.
@Override
public UdbTable getTable(String tableName, UdbBypassSpec bypassSpec) throws IOException {
Table table;
List<Partition> partitions;
try {
GetTableRequest tableRequest = new GetTableRequest().withCatalogId(mGlueConfiguration.get(Property.CATALOG_ID)).withDatabaseName(mGlueDbName).withName(tableName);
table = getClient().getTable(tableRequest).getTable();
partitions = batchGetPartitions(getClient(), tableName);
PathTranslator pathTranslator = mountAlluxioPaths(table, partitions, bypassSpec);
List<Column> partitionColumns;
if (table.getPartitionKeys() == null) {
partitionColumns = Collections.emptyList();
} else {
partitionColumns = table.getPartitionKeys();
}
// Get table parameters
Map<String, String> tableParameters = table.getParameters() == null ? Collections.emptyMap() : table.getParameters();
// Get column statistics info for table
List<String> columnNames = table.getStorageDescriptor().getColumns().stream().map(Column::getName).collect(Collectors.toList());
GetColumnStatisticsForTableRequest getColumnStatisticsForTableRequest = new GetColumnStatisticsForTableRequest().withCatalogId(mGlueConfiguration.get(Property.CATALOG_ID)).withDatabaseName(mGlueDbName).withTableName(tableName).withColumnNames(columnNames);
List<ColumnStatisticsInfo> columnStatisticsTableData = new ArrayList<>();
if (mGlueConfiguration.getBoolean(Property.TABLE_COLUMN_STATISTICS_ENABLE)) {
columnStatisticsTableData = getTableColumnStatistics(mGlueDbName, tableName, getColumnStatisticsForTableRequest);
}
// Get column statistics info for partitions
// potential expensive call
Map<String, List<ColumnStatisticsInfo>> statsMap = new HashMap<>();
if (mGlueConfiguration.getBoolean(Property.PARTITION_COLUMN_STATISTICS_ENABLE)) {
for (Partition partition : partitions) {
List<String> partitionValue = partition.getValues();
if (partitionValue != null) {
GetColumnStatisticsForPartitionRequest getColumnStatisticsForPartitionRequest = new GetColumnStatisticsForPartitionRequest().withCatalogId(mGlueConfiguration.get(Property.CATALOG_ID)).withDatabaseName(mGlueDbName).withTableName(tableName).withColumnNames(columnNames).withPartitionValues(partitionValue);
String partName = GlueUtils.makePartitionName(partitionColumns, partition.getValues());
statsMap.put(partName, getPartitionColumnStatistics(mGlueDbName, tableName, getColumnStatisticsForPartitionRequest));
}
}
}
PartitionInfo partitionInfo = PartitionInfo.newBuilder().setDbName(mGlueDbName).setTableName(tableName).addAllDataCols(GlueUtils.toProto(table.getStorageDescriptor().getColumns())).setStorage(GlueUtils.toProto(table.getStorageDescriptor(), pathTranslator)).putAllParameters(tableParameters).build();
Layout layout = Layout.newBuilder().setLayoutType(HiveLayout.TYPE).setLayoutData(partitionInfo.toByteString()).build();
List<UdbPartition> udbPartitions = new ArrayList<>();
if (partitionColumns.isEmpty()) {
PartitionInfo.Builder partitionInfoBuilder = PartitionInfo.newBuilder().setDbName(mGlueDbName).setTableName(tableName).addAllDataCols(GlueUtils.toProto(table.getStorageDescriptor().getColumns())).setStorage(GlueUtils.toProto(table.getStorageDescriptor(), pathTranslator)).setPartitionName(tableName).putAllParameters(tableParameters);
udbPartitions.add(new GluePartition(new HiveLayout(partitionInfoBuilder.build(), Collections.emptyList())));
} else {
for (Partition partition : partitions) {
String partName = GlueUtils.makePartitionName(partitionColumns, partition.getValues());
PartitionInfo.Builder partitionInfoBuilder = PartitionInfo.newBuilder().setDbName(mGlueDbName).setTableName(tableName).addAllDataCols(GlueUtils.toProto(partition.getStorageDescriptor().getColumns())).setStorage(GlueUtils.toProto(partition.getStorageDescriptor(), pathTranslator)).setPartitionName(partName).putAllParameters(partition.getParameters() == null ? Collections.emptyMap() : partition.getParameters());
if (partition.getValues() != null) {
partitionInfoBuilder.addAllValues(partition.getValues());
}
udbPartitions.add(new GluePartition(new HiveLayout(partitionInfoBuilder.build(), statsMap.getOrDefault(partName, Collections.emptyList()))));
}
}
return new GlueTable(this, pathTranslator, tableName, GlueUtils.toProtoSchema(table.getStorageDescriptor().getColumns()), columnStatisticsTableData, // Get FieldSchema from partition keys
GlueUtils.toProto(table.getPartitionKeys()), udbPartitions, layout, table);
} catch (EntityNotFoundException e) {
throw new NotFoundException("Table " + tableName + " does not exist in Database: " + mGlueDbName + "; Catalog ID: " + mGlueConfiguration.get(Property.CATALOG_ID) + ".", e);
} catch (ValidationException e) {
throw new IOException("Failed to get table: " + tableName + " in Database: " + mGlueDbName + "; Catalog ID: " + mGlueConfiguration.get(Property.CATALOG_ID) + " with validation error: " + e.getMessage(), e);
} catch (GlueEncryptionException e) {
throw new IOException("Failed to get table: " + tableName + " in Database: " + mGlueDbName + "; Catalog ID: " + mGlueConfiguration.get(Property.CATALOG_ID) + " error: " + e.getMessage(), e);
}
}
Aggregations