Search in sources :

Example 1 with IncrementingValueNameProducer

use of com.amazonaws.athena.connectors.dynamodb.util.IncrementingValueNameProducer in project aws-athena-query-federation by awslabs.

the class DynamoDBMetadataHandler method enhancePartitionSchema.

/**
 * Generates a partition schema with metadata derived from available predicates.  This metadata will be
 * copied to splits in the #doGetSplits call.  At this point it is determined whether we can partition
 * by hash key or fall back to a full table scan.
 *
 * @see GlueMetadataHandler
 */
@Override
public void enhancePartitionSchema(SchemaBuilder partitionSchemaBuilder, GetTableLayoutRequest request) {
    // use the source table name from the schema if available (in case Glue table name != actual table name)
    String tableName = getSourceTableName(request.getSchema());
    if (tableName == null) {
        tableName = request.getTableName().getTableName();
    }
    DynamoDBTable table = null;
    try {
        table = tableResolver.getTableMetadata(tableName);
    } catch (TimeoutException e) {
        throw new RuntimeException(e);
    }
    // add table name so we don't have to do case insensitive resolution again
    partitionSchemaBuilder.addMetadata(TABLE_METADATA, table.getName());
    Map<String, ValueSet> summary = request.getConstraints().getSummary();
    List<String> requestedCols = request.getSchema().getFields().stream().map(Field::getName).collect(Collectors.toList());
    DynamoDBIndex index = DDBPredicateUtils.getBestIndexForPredicates(table, requestedCols, summary);
    logger.info("using index: {}", index.getName());
    String hashKeyName = index.getHashKey();
    ValueSet hashKeyValueSet = summary.get(hashKeyName);
    List<Object> hashKeyValues = (hashKeyValueSet != null) ? DDBPredicateUtils.getHashKeyAttributeValues(hashKeyValueSet) : Collections.emptyList();
    DDBRecordMetadata recordMetadata = new DDBRecordMetadata(request.getSchema());
    Set<String> columnsToIgnore = new HashSet<>();
    List<AttributeValue> valueAccumulator = new ArrayList<>();
    IncrementingValueNameProducer valueNameProducer = new IncrementingValueNameProducer();
    if (!hashKeyValues.isEmpty()) {
        // can "partition" on hash key
        partitionSchemaBuilder.addField(hashKeyName, hashKeyValueSet.getType());
        partitionSchemaBuilder.addMetadata(HASH_KEY_NAME_METADATA, hashKeyName);
        columnsToIgnore.add(hashKeyName);
        partitionSchemaBuilder.addMetadata(PARTITION_TYPE_METADATA, QUERY_PARTITION_TYPE);
        if (!table.getName().equals(index.getName())) {
            partitionSchemaBuilder.addMetadata(INDEX_METADATA, index.getName());
        }
        // add range key filter if there is one
        Optional<String> rangeKey = index.getRangeKey();
        if (rangeKey.isPresent()) {
            String rangeKeyName = rangeKey.get();
            if (summary.containsKey(rangeKeyName)) {
                String rangeKeyFilter = DDBPredicateUtils.generateSingleColumnFilter(rangeKeyName, summary.get(rangeKeyName), valueAccumulator, valueNameProducer, recordMetadata);
                partitionSchemaBuilder.addMetadata(RANGE_KEY_NAME_METADATA, rangeKeyName);
                partitionSchemaBuilder.addMetadata(RANGE_KEY_FILTER_METADATA, rangeKeyFilter);
                columnsToIgnore.add(rangeKeyName);
            }
        }
    } else {
        // always fall back to a scan
        partitionSchemaBuilder.addField(SEGMENT_COUNT_METADATA, Types.MinorType.INT.getType());
        partitionSchemaBuilder.addMetadata(PARTITION_TYPE_METADATA, SCAN_PARTITION_TYPE);
    }
    // We will exclude the columns with custom types from filter clause when querying/scanning DDB
    // As those types are not natively supported by DDB or Glue
    // So we have to filter the results after the query/scan result is returned
    columnsToIgnore.addAll(recordMetadata.getNonComparableColumns());
    precomputeAdditionalMetadata(columnsToIgnore, summary, valueAccumulator, valueNameProducer, partitionSchemaBuilder, recordMetadata);
}
Also used : IncrementingValueNameProducer(com.amazonaws.athena.connectors.dynamodb.util.IncrementingValueNameProducer) AttributeValue(com.amazonaws.services.dynamodbv2.model.AttributeValue) ArrayList(java.util.ArrayList) DynamoDBIndex(com.amazonaws.athena.connectors.dynamodb.model.DynamoDBIndex) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) DDBRecordMetadata(com.amazonaws.athena.connectors.dynamodb.util.DDBRecordMetadata) DynamoDBTable(com.amazonaws.athena.connectors.dynamodb.model.DynamoDBTable) TimeoutException(java.util.concurrent.TimeoutException) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Example 2 with IncrementingValueNameProducer

use of com.amazonaws.athena.connectors.dynamodb.util.IncrementingValueNameProducer in project aws-athena-query-federation by awslabs.

the class DynamoDBMetadataHandler method precomputeAdditionalMetadata.

/*
    Injects additional metadata into the partition schema like a non-key filter expression for additional DDB-side filtering
     */
private void precomputeAdditionalMetadata(Set<String> columnsToIgnore, Map<String, ValueSet> predicates, List<AttributeValue> accumulator, IncrementingValueNameProducer valueNameProducer, SchemaBuilder partitionsSchemaBuilder, DDBRecordMetadata recordMetadata) {
    // precompute non-key filter
    String filterExpression = DDBPredicateUtils.generateFilterExpression(columnsToIgnore, predicates, accumulator, valueNameProducer, recordMetadata);
    if (filterExpression != null) {
        partitionsSchemaBuilder.addMetadata(NON_KEY_FILTER_METADATA, filterExpression);
    }
    if (!accumulator.isEmpty()) {
        // add in mappings for aliased columns and value placeholders
        Map<String, String> aliasedColumns = new HashMap<>();
        for (String column : predicates.keySet()) {
            aliasedColumns.put(DDBPredicateUtils.aliasColumn(column), column);
        }
        Map<String, AttributeValue> expressionValueMapping = new HashMap<>();
        // IncrementingValueNameProducer is repeatable for simplicity
        IncrementingValueNameProducer valueNameProducer2 = new IncrementingValueNameProducer();
        for (AttributeValue value : accumulator) {
            expressionValueMapping.put(valueNameProducer2.getNext(), value);
        }
        partitionsSchemaBuilder.addMetadata(EXPRESSION_NAMES_METADATA, Jackson.toJsonString(aliasedColumns));
        partitionsSchemaBuilder.addMetadata(EXPRESSION_VALUES_METADATA, Jackson.toJsonString(expressionValueMapping));
    }
}
Also used : IncrementingValueNameProducer(com.amazonaws.athena.connectors.dynamodb.util.IncrementingValueNameProducer) AttributeValue(com.amazonaws.services.dynamodbv2.model.AttributeValue) HashMap(java.util.HashMap)

Aggregations

IncrementingValueNameProducer (com.amazonaws.athena.connectors.dynamodb.util.IncrementingValueNameProducer)2 AttributeValue (com.amazonaws.services.dynamodbv2.model.AttributeValue)2 ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)1 DynamoDBIndex (com.amazonaws.athena.connectors.dynamodb.model.DynamoDBIndex)1 DynamoDBTable (com.amazonaws.athena.connectors.dynamodb.model.DynamoDBTable)1 DDBRecordMetadata (com.amazonaws.athena.connectors.dynamodb.util.DDBRecordMetadata)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 LinkedHashSet (java.util.LinkedHashSet)1 TimeoutException (java.util.concurrent.TimeoutException)1