Search in sources :

Example 1 with SEGMENT_COUNT_METADATA

use of com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.SEGMENT_COUNT_METADATA in project aws-athena-query-federation by awslabs.

the class DynamoDBRecordHandler method buildReadRequest.

/*
    Converts a split into a Query or Scan request
     */
private AmazonWebServiceRequest buildReadRequest(Split split, String tableName, Schema schema) {
    validateExpectedMetadata(split.getProperties());
    // prepare filters
    String rangeKeyFilter = split.getProperty(RANGE_KEY_FILTER_METADATA);
    String nonKeyFilter = split.getProperty(NON_KEY_FILTER_METADATA);
    Map<String, String> expressionAttributeNames = new HashMap<>();
    Map<String, AttributeValue> expressionAttributeValues = new HashMap<>();
    if (rangeKeyFilter != null || nonKeyFilter != null) {
        try {
            expressionAttributeNames.putAll(Jackson.getObjectMapper().readValue(split.getProperty(EXPRESSION_NAMES_METADATA), STRING_MAP_TYPE_REFERENCE));
            expressionAttributeValues.putAll(Jackson.getObjectMapper().readValue(split.getProperty(EXPRESSION_VALUES_METADATA), ATTRIBUTE_VALUE_MAP_TYPE_REFERENCE));
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }
    // Only read columns that are needed in the query
    String projectionExpression = schema.getFields().stream().map(field -> {
        String aliasedName = DDBPredicateUtils.aliasColumn(field.getName());
        expressionAttributeNames.put(aliasedName, field.getName());
        return aliasedName;
    }).collect(Collectors.joining(","));
    boolean isQuery = split.getProperty(SEGMENT_ID_PROPERTY) == null;
    if (isQuery) {
        // prepare key condition expression
        String indexName = split.getProperty(INDEX_METADATA);
        String hashKeyName = split.getProperty(HASH_KEY_NAME_METADATA);
        String hashKeyAlias = DDBPredicateUtils.aliasColumn(hashKeyName);
        String keyConditionExpression = hashKeyAlias + " = " + HASH_KEY_VALUE_ALIAS;
        if (rangeKeyFilter != null) {
            keyConditionExpression += " AND " + rangeKeyFilter;
        }
        expressionAttributeNames.put(hashKeyAlias, hashKeyName);
        expressionAttributeValues.put(HASH_KEY_VALUE_ALIAS, Jackson.fromJsonString(split.getProperty(hashKeyName), AttributeValue.class));
        return new QueryRequest().withTableName(tableName).withIndexName(indexName).withKeyConditionExpression(keyConditionExpression).withFilterExpression(nonKeyFilter).withExpressionAttributeNames(expressionAttributeNames).withExpressionAttributeValues(expressionAttributeValues).withProjectionExpression(projectionExpression);
    } else {
        int segmentId = Integer.parseInt(split.getProperty(SEGMENT_ID_PROPERTY));
        int segmentCount = Integer.parseInt(split.getProperty(SEGMENT_COUNT_METADATA));
        return new ScanRequest().withTableName(tableName).withSegment(segmentId).withTotalSegments(segmentCount).withFilterExpression(nonKeyFilter).withExpressionAttributeNames(expressionAttributeNames.isEmpty() ? null : expressionAttributeNames).withExpressionAttributeValues(expressionAttributeValues.isEmpty() ? null : expressionAttributeValues).withProjectionExpression(projectionExpression);
    }
}
Also used : Schema(org.apache.arrow.vector.types.pojo.Schema) LoadingCache(com.google.common.cache.LoadingCache) Types(org.apache.arrow.vector.types.Types) LoggerFactory(org.slf4j.LoggerFactory) SEGMENT_COUNT_METADATA(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.SEGMENT_COUNT_METADATA) TimeoutException(java.util.concurrent.TimeoutException) Block(com.amazonaws.athena.connector.lambda.data.Block) BlockSpiller(com.amazonaws.athena.connector.lambda.data.BlockSpiller) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) AttributeValue(com.amazonaws.services.dynamodbv2.model.AttributeValue) Map(java.util.Map) DDBRecordMetadata(com.amazonaws.athena.connectors.dynamodb.util.DDBRecordMetadata) TypeReference(com.fasterxml.jackson.core.type.TypeReference) RANGE_KEY_FILTER_METADATA(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.RANGE_KEY_FILTER_METADATA) ScanRequest(com.amazonaws.services.dynamodbv2.model.ScanRequest) Split(com.amazonaws.athena.connector.lambda.domain.Split) Field(org.apache.arrow.vector.types.pojo.Field) EXPRESSION_NAMES_METADATA(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.EXPRESSION_NAMES_METADATA) Collectors(java.util.stream.Collectors) CacheLoader(com.google.common.cache.CacheLoader) EXPRESSION_VALUES_METADATA(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.EXPRESSION_VALUES_METADATA) List(java.util.List) CacheBuilder(com.google.common.cache.CacheBuilder) ThrottlingInvoker(com.amazonaws.athena.connector.lambda.ThrottlingInvoker) QueryRequest(com.amazonaws.services.dynamodbv2.model.QueryRequest) ScanResult(com.amazonaws.services.dynamodbv2.model.ScanResult) HashMap(java.util.HashMap) AtomicReference(java.util.concurrent.atomic.AtomicReference) Jackson(com.amazonaws.util.json.Jackson) DynamoDBFieldResolver(com.amazonaws.athena.connectors.dynamodb.resolver.DynamoDBFieldResolver) DDBTypeUtils(com.amazonaws.athena.connectors.dynamodb.util.DDBTypeUtils) ItemUtils(com.amazonaws.services.dynamodbv2.document.ItemUtils) AmazonS3(com.amazonaws.services.s3.AmazonS3) QueryStatusChecker(com.amazonaws.athena.connector.lambda.QueryStatusChecker) INDEX_METADATA(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.INDEX_METADATA) NON_KEY_FILTER_METADATA(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.NON_KEY_FILTER_METADATA) HASH_KEY_NAME_METADATA(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.HASH_KEY_NAME_METADATA) AmazonWebServiceRequest(com.amazonaws.AmazonWebServiceRequest) Logger(org.slf4j.Logger) AmazonAthena(com.amazonaws.services.athena.AmazonAthena) Iterator(java.util.Iterator) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) AmazonDynamoDBClientBuilder(com.amazonaws.services.dynamodbv2.AmazonDynamoDBClientBuilder) AWSSecretsManager(com.amazonaws.services.secretsmanager.AWSSecretsManager) IOException(java.io.IOException) SEGMENT_ID_PROPERTY(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.SEGMENT_ID_PROPERTY) AmazonDynamoDB(com.amazonaws.services.dynamodbv2.AmazonDynamoDB) TABLE_METADATA(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.TABLE_METADATA) QueryResult(com.amazonaws.services.dynamodbv2.model.QueryResult) VisibleForTesting(org.apache.arrow.util.VisibleForTesting) ExecutionException(java.util.concurrent.ExecutionException) AtomicLong(java.util.concurrent.atomic.AtomicLong) DDBPredicateUtils(com.amazonaws.athena.connectors.dynamodb.util.DDBPredicateUtils) RecordHandler(com.amazonaws.athena.connector.lambda.handlers.RecordHandler) EXCEPTION_FILTER(com.amazonaws.athena.connectors.dynamodb.throttling.DynamoDBExceptionFilter.EXCEPTION_FILTER) ScanRequest(com.amazonaws.services.dynamodbv2.model.ScanRequest) AttributeValue(com.amazonaws.services.dynamodbv2.model.AttributeValue) QueryRequest(com.amazonaws.services.dynamodbv2.model.QueryRequest) HashMap(java.util.HashMap) IOException(java.io.IOException)

Aggregations

AmazonWebServiceRequest (com.amazonaws.AmazonWebServiceRequest)1 QueryStatusChecker (com.amazonaws.athena.connector.lambda.QueryStatusChecker)1 ThrottlingInvoker (com.amazonaws.athena.connector.lambda.ThrottlingInvoker)1 Block (com.amazonaws.athena.connector.lambda.data.Block)1 BlockSpiller (com.amazonaws.athena.connector.lambda.data.BlockSpiller)1 Split (com.amazonaws.athena.connector.lambda.domain.Split)1 RecordHandler (com.amazonaws.athena.connector.lambda.handlers.RecordHandler)1 ReadRecordsRequest (com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest)1 EXPRESSION_NAMES_METADATA (com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.EXPRESSION_NAMES_METADATA)1 EXPRESSION_VALUES_METADATA (com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.EXPRESSION_VALUES_METADATA)1 HASH_KEY_NAME_METADATA (com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.HASH_KEY_NAME_METADATA)1 INDEX_METADATA (com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.INDEX_METADATA)1 NON_KEY_FILTER_METADATA (com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.NON_KEY_FILTER_METADATA)1 RANGE_KEY_FILTER_METADATA (com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.RANGE_KEY_FILTER_METADATA)1 SEGMENT_COUNT_METADATA (com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.SEGMENT_COUNT_METADATA)1 SEGMENT_ID_PROPERTY (com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.SEGMENT_ID_PROPERTY)1 TABLE_METADATA (com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.TABLE_METADATA)1 DynamoDBFieldResolver (com.amazonaws.athena.connectors.dynamodb.resolver.DynamoDBFieldResolver)1 EXCEPTION_FILTER (com.amazonaws.athena.connectors.dynamodb.throttling.DynamoDBExceptionFilter.EXCEPTION_FILTER)1 DDBPredicateUtils (com.amazonaws.athena.connectors.dynamodb.util.DDBPredicateUtils)1