Search in sources :

Example 6 with DynamoDBTable

use of com.amazonaws.athena.connectors.dynamodb.model.DynamoDBTable in project aws-athena-query-federation by awslabs.

the class DynamoDBMetadataHandler method getPartitions.

/**
 * Generates hash key partitions if possible or generates a single partition with the heuristically
 * determined optimal scan segment count specified inside of it
 *
 * @see GlueMetadataHandler
 */
@Override
public void getPartitions(BlockWriter blockWriter, GetTableLayoutRequest request, QueryStatusChecker queryStatusChecker) throws Exception {
    // TODO consider caching this repeated work in #enhancePartitionSchema
    // use the source table name from the schema if available (in case Glue table name != actual table name)
    String tableName = getSourceTableName(request.getSchema());
    if (tableName == null) {
        tableName = request.getTableName().getTableName();
    }
    DynamoDBTable table = tableResolver.getTableMetadata(tableName);
    Map<String, ValueSet> summary = request.getConstraints().getSummary();
    List<String> requestedCols = request.getSchema().getFields().stream().map(Field::getName).collect(Collectors.toList());
    DynamoDBIndex index = DDBPredicateUtils.getBestIndexForPredicates(table, requestedCols, summary);
    logger.info("using index: {}", index.getName());
    String hashKeyName = index.getHashKey();
    ValueSet hashKeyValueSet = summary.get(hashKeyName);
    List<Object> hashKeyValues = (hashKeyValueSet != null) ? DDBPredicateUtils.getHashKeyAttributeValues(hashKeyValueSet) : Collections.emptyList();
    if (!hashKeyValues.isEmpty()) {
        for (Object hashKeyValue : hashKeyValues) {
            blockWriter.writeRows((Block block, int rowNum) -> {
                block.setValue(hashKeyName, rowNum, hashKeyValue);
                // we added 1 partition per hashkey value
                return 1;
            });
        }
    } else {
        // always fall back to a scan, need to return at least one partition so stick the segment count in it
        int segmentCount = DDBTableUtils.getNumSegments(table.getProvisionedReadCapacity(), table.getApproxTableSizeInBytes());
        blockWriter.writeRows((Block block, int rowNum) -> {
            block.setValue(SEGMENT_COUNT_METADATA, rowNum, segmentCount);
            return 1;
        });
    }
}
Also used : DynamoDBIndex(com.amazonaws.athena.connectors.dynamodb.model.DynamoDBIndex) Block(com.amazonaws.athena.connector.lambda.data.Block) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) DynamoDBTable(com.amazonaws.athena.connectors.dynamodb.model.DynamoDBTable)

Example 7 with DynamoDBTable

use of com.amazonaws.athena.connectors.dynamodb.model.DynamoDBTable in project aws-athena-query-federation by awslabs.

the class DDBTableUtils method getTable.

/**
 * Fetches metadata for a DynamoDB table
 *
 * @param tableName the (case sensitive) table name
 * @param invoker the ThrottlingInvoker to call DDB with
 * @param ddbClient the DDB client to use
 * @return the table metadata
 */
public static DynamoDBTable getTable(String tableName, ThrottlingInvoker invoker, AmazonDynamoDB ddbClient) throws TimeoutException {
    DescribeTableRequest request = new DescribeTableRequest().withTableName(tableName);
    TableDescription table = invoker.invoke(() -> ddbClient.describeTable(request).getTable());
    KeyNames keys = getKeys(table.getKeySchema());
    // get data statistics
    long approxTableSizeInBytes = table.getTableSizeBytes();
    long approxItemCount = table.getItemCount();
    final long provisionedReadCapacity = table.getProvisionedThroughput() != null ? table.getProvisionedThroughput().getReadCapacityUnits() : PSUEDO_CAPACITY_FOR_ON_DEMAND;
    // get secondary indexes
    List<LocalSecondaryIndexDescription> localSecondaryIndexes = table.getLocalSecondaryIndexes() != null ? table.getLocalSecondaryIndexes() : ImmutableList.of();
    List<GlobalSecondaryIndexDescription> globalSecondaryIndexes = table.getGlobalSecondaryIndexes() != null ? table.getGlobalSecondaryIndexes() : ImmutableList.of();
    ImmutableList.Builder<DynamoDBIndex> indices = ImmutableList.builder();
    localSecondaryIndexes.forEach(i -> {
        KeyNames indexKeys = getKeys(i.getKeySchema());
        // DynamoDB automatically fetches all attributes from the table for local secondary index, so ignore projected attributes
        indices.add(new DynamoDBIndex(i.getIndexName(), indexKeys.getHashKey(), indexKeys.getRangeKey(), ProjectionType.ALL, ImmutableList.of()));
    });
    globalSecondaryIndexes.stream().filter(i -> IndexStatus.fromValue(i.getIndexStatus()).equals(IndexStatus.ACTIVE)).forEach(i -> {
        KeyNames indexKeys = getKeys(i.getKeySchema());
        indices.add(new DynamoDBIndex(i.getIndexName(), indexKeys.getHashKey(), indexKeys.getRangeKey(), ProjectionType.fromValue(i.getProjection().getProjectionType()), i.getProjection().getNonKeyAttributes() == null ? ImmutableList.of() : i.getProjection().getNonKeyAttributes()));
    });
    return new DynamoDBTable(tableName, keys.getHashKey(), keys.getRangeKey(), table.getAttributeDefinitions(), indices.build(), approxTableSizeInBytes, approxItemCount, provisionedReadCapacity);
}
Also used : Schema(org.apache.arrow.vector.types.pojo.Schema) ThrottlingInvoker(com.amazonaws.athena.connector.lambda.ThrottlingInvoker) AttributeDefinition(com.amazonaws.services.dynamodbv2.model.AttributeDefinition) KeySchemaElement(com.amazonaws.services.dynamodbv2.model.KeySchemaElement) KeyType(com.amazonaws.services.dynamodbv2.model.KeyType) ScanResult(com.amazonaws.services.dynamodbv2.model.ScanResult) LoggerFactory(org.slf4j.LoggerFactory) TimeoutException(java.util.concurrent.TimeoutException) IndexStatus(com.amazonaws.services.dynamodbv2.model.IndexStatus) HashSet(java.util.HashSet) LocalSecondaryIndexDescription(com.amazonaws.services.dynamodbv2.model.LocalSecondaryIndexDescription) TableDescription(com.amazonaws.services.dynamodbv2.model.TableDescription) ImmutableList(com.google.common.collect.ImmutableList) SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) AttributeValue(com.amazonaws.services.dynamodbv2.model.AttributeValue) Map(java.util.Map) ItemUtils(com.amazonaws.services.dynamodbv2.document.ItemUtils) DynamoDBTable(com.amazonaws.athena.connectors.dynamodb.model.DynamoDBTable) DescribeTableRequest(com.amazonaws.services.dynamodbv2.model.DescribeTableRequest) GlobalSecondaryIndexDescription(com.amazonaws.services.dynamodbv2.model.GlobalSecondaryIndexDescription) ProjectionType(com.amazonaws.services.dynamodbv2.model.ProjectionType) Logger(org.slf4j.Logger) ScanRequest(com.amazonaws.services.dynamodbv2.model.ScanRequest) Set(java.util.Set) Field(org.apache.arrow.vector.types.pojo.Field) AmazonDynamoDB(com.amazonaws.services.dynamodbv2.AmazonDynamoDB) DynamoDBIndex(com.amazonaws.athena.connectors.dynamodb.model.DynamoDBIndex) List(java.util.List) Optional(java.util.Optional) ImmutableList(com.google.common.collect.ImmutableList) DynamoDBIndex(com.amazonaws.athena.connectors.dynamodb.model.DynamoDBIndex) DescribeTableRequest(com.amazonaws.services.dynamodbv2.model.DescribeTableRequest) TableDescription(com.amazonaws.services.dynamodbv2.model.TableDescription) GlobalSecondaryIndexDescription(com.amazonaws.services.dynamodbv2.model.GlobalSecondaryIndexDescription) LocalSecondaryIndexDescription(com.amazonaws.services.dynamodbv2.model.LocalSecondaryIndexDescription) DynamoDBTable(com.amazonaws.athena.connectors.dynamodb.model.DynamoDBTable)

Example 8 with DynamoDBTable

use of com.amazonaws.athena.connectors.dynamodb.model.DynamoDBTable in project aws-athena-query-federation by awslabs.

the class DDBPredicateUtilsTest method testGetBestIndexForPredicatesWithGSIProjectionTypeInclude.

@Test
public void testGetBestIndexForPredicatesWithGSIProjectionTypeInclude() {
    // global secondary index with INCLUDE projection type
    ValueSet singleValueSet = SortedRangeSet.of(Range.equal(new BlockAllocatorImpl(), VARCHAR.getType(), "value"));
    DynamoDBTable table = new DynamoDBTable("tableName", "hashKey", Optional.of("sortKey"), ImmutableList.of(new AttributeDefinition("hashKey", "S"), new AttributeDefinition("sortKey", "S"), new AttributeDefinition("col0", "S")), ImmutableList.of(new DynamoDBIndex("col0-gsi", "col0", Optional.empty(), ProjectionType.INCLUDE, ImmutableList.of("col1"))), 1000, 10, 5);
    assertEquals("tableName", DDBPredicateUtils.getBestIndexForPredicates(table, ImmutableList.of("hashKey", "sortKey", "col0", "col1", "col2"), ImmutableMap.of("hashKey", singleValueSet)).getName());
    assertEquals("tableName", DDBPredicateUtils.getBestIndexForPredicates(table, ImmutableList.of("hashKey", "sortKey", "col0", "col1", "col2"), ImmutableMap.of("sortKey", singleValueSet)).getName());
    assertEquals("tableName", DDBPredicateUtils.getBestIndexForPredicates(table, ImmutableList.of("hashKey", "sortKey", "col0", "col1", "col2"), ImmutableMap.of("col3", singleValueSet)).getName());
    assertEquals("tableName", DDBPredicateUtils.getBestIndexForPredicates(table, ImmutableList.of("hashKey", "sortKey", "col0", "col1", "col2"), ImmutableMap.of("col0", singleValueSet)).getName());
    assertEquals("col0-gsi", DDBPredicateUtils.getBestIndexForPredicates(table, ImmutableList.of("hashKey", "sortKey", "col0", "col1"), ImmutableMap.of("col0", singleValueSet)).getName());
}
Also used : BlockAllocatorImpl(com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl) AttributeDefinition(com.amazonaws.services.dynamodbv2.model.AttributeDefinition) DynamoDBIndex(com.amazonaws.athena.connectors.dynamodb.model.DynamoDBIndex) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) DynamoDBTable(com.amazonaws.athena.connectors.dynamodb.model.DynamoDBTable) Test(org.junit.Test)

Example 9 with DynamoDBTable

use of com.amazonaws.athena.connectors.dynamodb.model.DynamoDBTable in project aws-athena-query-federation by awslabs.

the class DDBPredicateUtilsTest method testGetBestIndexForPredicatesWithGSIProjectionTypeKeysOnly.

@Test
public void testGetBestIndexForPredicatesWithGSIProjectionTypeKeysOnly() {
    // global secondary index with KEYS_ONLY projection type
    ValueSet singleValueSet = SortedRangeSet.of(Range.equal(new BlockAllocatorImpl(), VARCHAR.getType(), "value"));
    DynamoDBTable table = new DynamoDBTable("tableName", "hashKey", Optional.of("sortKey"), ImmutableList.of(new AttributeDefinition("hashKey", "S"), new AttributeDefinition("sortKey", "S"), new AttributeDefinition("col0", "S")), ImmutableList.of(new DynamoDBIndex("col0-gsi", "col0", Optional.empty(), ProjectionType.KEYS_ONLY, ImmutableList.of())), 1000, 10, 5);
    assertEquals("tableName", DDBPredicateUtils.getBestIndexForPredicates(table, ImmutableList.of("hashKey", "sortKey", "col0", "col1"), ImmutableMap.of("hashKey", singleValueSet)).getName());
    assertEquals("tableName", DDBPredicateUtils.getBestIndexForPredicates(table, ImmutableList.of("hashKey", "sortKey", "col0", "col1"), ImmutableMap.of("col0", singleValueSet)).getName());
    assertEquals("col0-gsi", DDBPredicateUtils.getBestIndexForPredicates(table, ImmutableList.of("hashKey", "sortKey", "col0"), ImmutableMap.of("col0", singleValueSet)).getName());
}
Also used : BlockAllocatorImpl(com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl) AttributeDefinition(com.amazonaws.services.dynamodbv2.model.AttributeDefinition) DynamoDBIndex(com.amazonaws.athena.connectors.dynamodb.model.DynamoDBIndex) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) DynamoDBTable(com.amazonaws.athena.connectors.dynamodb.model.DynamoDBTable) Test(org.junit.Test)

Example 10 with DynamoDBTable

use of com.amazonaws.athena.connectors.dynamodb.model.DynamoDBTable in project aws-athena-query-federation by awslabs.

the class DDBPredicateUtilsTest method testGetBestIndexForPredicatesWithLSI.

@Test
public void testGetBestIndexForPredicatesWithLSI() {
    // local secondary index
    ValueSet singleValueSet = SortedRangeSet.of(Range.equal(new BlockAllocatorImpl(), VARCHAR.getType(), "value"));
    DynamoDBTable table = new DynamoDBTable("tableName", "hashKey", Optional.of("sortKey"), ImmutableList.of(new AttributeDefinition("hashKey", "S"), new AttributeDefinition("sortKey", "S"), new AttributeDefinition("col0", "S")), ImmutableList.of(new DynamoDBIndex("col0-lsi", "hashKey", Optional.of("col0"), ProjectionType.ALL, ImmutableList.of())), 1000, 10, 5);
    assertEquals("tableName", DDBPredicateUtils.getBestIndexForPredicates(table, ImmutableList.of("hashKey", "sortKey", "col0", "col1"), ImmutableMap.of("hashKey", singleValueSet)).getName());
    assertEquals("tableName", DDBPredicateUtils.getBestIndexForPredicates(table, ImmutableList.of("hashKey", "sortKey", "col0", "col1"), ImmutableMap.of("hashKey", singleValueSet, "sortKey", singleValueSet)).getName());
    assertEquals("col0-lsi", DDBPredicateUtils.getBestIndexForPredicates(table, ImmutableList.of("hashKey", "sortKey", "col0", "col1"), ImmutableMap.of("hashKey", singleValueSet, "col0", singleValueSet)).getName());
    assertEquals("tableName", DDBPredicateUtils.getBestIndexForPredicates(table, ImmutableList.of("hashKey", "sortKey", "col0", "col1"), ImmutableMap.of("col0", singleValueSet)).getName());
}
Also used : BlockAllocatorImpl(com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl) AttributeDefinition(com.amazonaws.services.dynamodbv2.model.AttributeDefinition) DynamoDBIndex(com.amazonaws.athena.connectors.dynamodb.model.DynamoDBIndex) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) DynamoDBTable(com.amazonaws.athena.connectors.dynamodb.model.DynamoDBTable) Test(org.junit.Test)

Aggregations

DynamoDBTable (com.amazonaws.athena.connectors.dynamodb.model.DynamoDBTable)11 DynamoDBIndex (com.amazonaws.athena.connectors.dynamodb.model.DynamoDBIndex)10 ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)9 AttributeDefinition (com.amazonaws.services.dynamodbv2.model.AttributeDefinition)8 BlockAllocatorImpl (com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl)6 Test (org.junit.Test)6 AttributeValue (com.amazonaws.services.dynamodbv2.model.AttributeValue)4 HashSet (java.util.HashSet)4 Map (java.util.Map)3 SchemaBuilder (com.amazonaws.athena.connector.lambda.data.SchemaBuilder)2 ItemUtils (com.amazonaws.services.dynamodbv2.document.ItemUtils)2 ProjectionType (com.amazonaws.services.dynamodbv2.model.ProjectionType)2 ScanRequest (com.amazonaws.services.dynamodbv2.model.ScanRequest)2 ScanResult (com.amazonaws.services.dynamodbv2.model.ScanResult)2 ImmutableList (com.google.common.collect.ImmutableList)2 ArrayList (java.util.ArrayList)2 List (java.util.List)2 Set (java.util.Set)2 TimeoutException (java.util.concurrent.TimeoutException)2 Field (org.apache.arrow.vector.types.pojo.Field)2