use of com.amazonaws.athena.connectors.dynamodb.model.DynamoDBTable in project aws-athena-query-federation by awslabs.
the class DynamoDBMetadataHandler method getPartitions.
/**
* Generates hash key partitions if possible or generates a single partition with the heuristically
* determined optimal scan segment count specified inside of it
*
* @see GlueMetadataHandler
*/
@Override
public void getPartitions(BlockWriter blockWriter, GetTableLayoutRequest request, QueryStatusChecker queryStatusChecker) throws Exception {
// TODO consider caching this repeated work in #enhancePartitionSchema
// use the source table name from the schema if available (in case Glue table name != actual table name)
String tableName = getSourceTableName(request.getSchema());
if (tableName == null) {
tableName = request.getTableName().getTableName();
}
DynamoDBTable table = tableResolver.getTableMetadata(tableName);
Map<String, ValueSet> summary = request.getConstraints().getSummary();
List<String> requestedCols = request.getSchema().getFields().stream().map(Field::getName).collect(Collectors.toList());
DynamoDBIndex index = DDBPredicateUtils.getBestIndexForPredicates(table, requestedCols, summary);
logger.info("using index: {}", index.getName());
String hashKeyName = index.getHashKey();
ValueSet hashKeyValueSet = summary.get(hashKeyName);
List<Object> hashKeyValues = (hashKeyValueSet != null) ? DDBPredicateUtils.getHashKeyAttributeValues(hashKeyValueSet) : Collections.emptyList();
if (!hashKeyValues.isEmpty()) {
for (Object hashKeyValue : hashKeyValues) {
blockWriter.writeRows((Block block, int rowNum) -> {
block.setValue(hashKeyName, rowNum, hashKeyValue);
// we added 1 partition per hashkey value
return 1;
});
}
} else {
// always fall back to a scan, need to return at least one partition so stick the segment count in it
int segmentCount = DDBTableUtils.getNumSegments(table.getProvisionedReadCapacity(), table.getApproxTableSizeInBytes());
blockWriter.writeRows((Block block, int rowNum) -> {
block.setValue(SEGMENT_COUNT_METADATA, rowNum, segmentCount);
return 1;
});
}
}
use of com.amazonaws.athena.connectors.dynamodb.model.DynamoDBTable in project aws-athena-query-federation by awslabs.
the class DDBTableUtils method getTable.
/**
* Fetches metadata for a DynamoDB table
*
* @param tableName the (case sensitive) table name
* @param invoker the ThrottlingInvoker to call DDB with
* @param ddbClient the DDB client to use
* @return the table metadata
*/
public static DynamoDBTable getTable(String tableName, ThrottlingInvoker invoker, AmazonDynamoDB ddbClient) throws TimeoutException {
DescribeTableRequest request = new DescribeTableRequest().withTableName(tableName);
TableDescription table = invoker.invoke(() -> ddbClient.describeTable(request).getTable());
KeyNames keys = getKeys(table.getKeySchema());
// get data statistics
long approxTableSizeInBytes = table.getTableSizeBytes();
long approxItemCount = table.getItemCount();
final long provisionedReadCapacity = table.getProvisionedThroughput() != null ? table.getProvisionedThroughput().getReadCapacityUnits() : PSUEDO_CAPACITY_FOR_ON_DEMAND;
// get secondary indexes
List<LocalSecondaryIndexDescription> localSecondaryIndexes = table.getLocalSecondaryIndexes() != null ? table.getLocalSecondaryIndexes() : ImmutableList.of();
List<GlobalSecondaryIndexDescription> globalSecondaryIndexes = table.getGlobalSecondaryIndexes() != null ? table.getGlobalSecondaryIndexes() : ImmutableList.of();
ImmutableList.Builder<DynamoDBIndex> indices = ImmutableList.builder();
localSecondaryIndexes.forEach(i -> {
KeyNames indexKeys = getKeys(i.getKeySchema());
// DynamoDB automatically fetches all attributes from the table for local secondary index, so ignore projected attributes
indices.add(new DynamoDBIndex(i.getIndexName(), indexKeys.getHashKey(), indexKeys.getRangeKey(), ProjectionType.ALL, ImmutableList.of()));
});
globalSecondaryIndexes.stream().filter(i -> IndexStatus.fromValue(i.getIndexStatus()).equals(IndexStatus.ACTIVE)).forEach(i -> {
KeyNames indexKeys = getKeys(i.getKeySchema());
indices.add(new DynamoDBIndex(i.getIndexName(), indexKeys.getHashKey(), indexKeys.getRangeKey(), ProjectionType.fromValue(i.getProjection().getProjectionType()), i.getProjection().getNonKeyAttributes() == null ? ImmutableList.of() : i.getProjection().getNonKeyAttributes()));
});
return new DynamoDBTable(tableName, keys.getHashKey(), keys.getRangeKey(), table.getAttributeDefinitions(), indices.build(), approxTableSizeInBytes, approxItemCount, provisionedReadCapacity);
}
use of com.amazonaws.athena.connectors.dynamodb.model.DynamoDBTable in project aws-athena-query-federation by awslabs.
the class DDBPredicateUtilsTest method testGetBestIndexForPredicatesWithGSIProjectionTypeInclude.
@Test
public void testGetBestIndexForPredicatesWithGSIProjectionTypeInclude() {
// global secondary index with INCLUDE projection type
ValueSet singleValueSet = SortedRangeSet.of(Range.equal(new BlockAllocatorImpl(), VARCHAR.getType(), "value"));
DynamoDBTable table = new DynamoDBTable("tableName", "hashKey", Optional.of("sortKey"), ImmutableList.of(new AttributeDefinition("hashKey", "S"), new AttributeDefinition("sortKey", "S"), new AttributeDefinition("col0", "S")), ImmutableList.of(new DynamoDBIndex("col0-gsi", "col0", Optional.empty(), ProjectionType.INCLUDE, ImmutableList.of("col1"))), 1000, 10, 5);
assertEquals("tableName", DDBPredicateUtils.getBestIndexForPredicates(table, ImmutableList.of("hashKey", "sortKey", "col0", "col1", "col2"), ImmutableMap.of("hashKey", singleValueSet)).getName());
assertEquals("tableName", DDBPredicateUtils.getBestIndexForPredicates(table, ImmutableList.of("hashKey", "sortKey", "col0", "col1", "col2"), ImmutableMap.of("sortKey", singleValueSet)).getName());
assertEquals("tableName", DDBPredicateUtils.getBestIndexForPredicates(table, ImmutableList.of("hashKey", "sortKey", "col0", "col1", "col2"), ImmutableMap.of("col3", singleValueSet)).getName());
assertEquals("tableName", DDBPredicateUtils.getBestIndexForPredicates(table, ImmutableList.of("hashKey", "sortKey", "col0", "col1", "col2"), ImmutableMap.of("col0", singleValueSet)).getName());
assertEquals("col0-gsi", DDBPredicateUtils.getBestIndexForPredicates(table, ImmutableList.of("hashKey", "sortKey", "col0", "col1"), ImmutableMap.of("col0", singleValueSet)).getName());
}
use of com.amazonaws.athena.connectors.dynamodb.model.DynamoDBTable in project aws-athena-query-federation by awslabs.
the class DDBPredicateUtilsTest method testGetBestIndexForPredicatesWithGSIProjectionTypeKeysOnly.
@Test
public void testGetBestIndexForPredicatesWithGSIProjectionTypeKeysOnly() {
// global secondary index with KEYS_ONLY projection type
ValueSet singleValueSet = SortedRangeSet.of(Range.equal(new BlockAllocatorImpl(), VARCHAR.getType(), "value"));
DynamoDBTable table = new DynamoDBTable("tableName", "hashKey", Optional.of("sortKey"), ImmutableList.of(new AttributeDefinition("hashKey", "S"), new AttributeDefinition("sortKey", "S"), new AttributeDefinition("col0", "S")), ImmutableList.of(new DynamoDBIndex("col0-gsi", "col0", Optional.empty(), ProjectionType.KEYS_ONLY, ImmutableList.of())), 1000, 10, 5);
assertEquals("tableName", DDBPredicateUtils.getBestIndexForPredicates(table, ImmutableList.of("hashKey", "sortKey", "col0", "col1"), ImmutableMap.of("hashKey", singleValueSet)).getName());
assertEquals("tableName", DDBPredicateUtils.getBestIndexForPredicates(table, ImmutableList.of("hashKey", "sortKey", "col0", "col1"), ImmutableMap.of("col0", singleValueSet)).getName());
assertEquals("col0-gsi", DDBPredicateUtils.getBestIndexForPredicates(table, ImmutableList.of("hashKey", "sortKey", "col0"), ImmutableMap.of("col0", singleValueSet)).getName());
}
use of com.amazonaws.athena.connectors.dynamodb.model.DynamoDBTable in project aws-athena-query-federation by awslabs.
the class DDBPredicateUtilsTest method testGetBestIndexForPredicatesWithLSI.
@Test
public void testGetBestIndexForPredicatesWithLSI() {
// local secondary index
ValueSet singleValueSet = SortedRangeSet.of(Range.equal(new BlockAllocatorImpl(), VARCHAR.getType(), "value"));
DynamoDBTable table = new DynamoDBTable("tableName", "hashKey", Optional.of("sortKey"), ImmutableList.of(new AttributeDefinition("hashKey", "S"), new AttributeDefinition("sortKey", "S"), new AttributeDefinition("col0", "S")), ImmutableList.of(new DynamoDBIndex("col0-lsi", "hashKey", Optional.of("col0"), ProjectionType.ALL, ImmutableList.of())), 1000, 10, 5);
assertEquals("tableName", DDBPredicateUtils.getBestIndexForPredicates(table, ImmutableList.of("hashKey", "sortKey", "col0", "col1"), ImmutableMap.of("hashKey", singleValueSet)).getName());
assertEquals("tableName", DDBPredicateUtils.getBestIndexForPredicates(table, ImmutableList.of("hashKey", "sortKey", "col0", "col1"), ImmutableMap.of("hashKey", singleValueSet, "sortKey", singleValueSet)).getName());
assertEquals("col0-lsi", DDBPredicateUtils.getBestIndexForPredicates(table, ImmutableList.of("hashKey", "sortKey", "col0", "col1"), ImmutableMap.of("hashKey", singleValueSet, "col0", singleValueSet)).getName());
assertEquals("tableName", DDBPredicateUtils.getBestIndexForPredicates(table, ImmutableList.of("hashKey", "sortKey", "col0", "col1"), ImmutableMap.of("col0", singleValueSet)).getName());
}
Aggregations