use of com.amazonaws.athena.connectors.dynamodb.util.IncrementingValueNameProducer in project aws-athena-query-federation by awslabs.
the class DynamoDBMetadataHandler method enhancePartitionSchema.
/**
* Generates a partition schema with metadata derived from available predicates. This metadata will be
* copied to splits in the #doGetSplits call. At this point it is determined whether we can partition
* by hash key or fall back to a full table scan.
*
* @see GlueMetadataHandler
*/
@Override
public void enhancePartitionSchema(SchemaBuilder partitionSchemaBuilder, GetTableLayoutRequest request) {
// use the source table name from the schema if available (in case Glue table name != actual table name)
String tableName = getSourceTableName(request.getSchema());
if (tableName == null) {
tableName = request.getTableName().getTableName();
}
DynamoDBTable table = null;
try {
table = tableResolver.getTableMetadata(tableName);
} catch (TimeoutException e) {
throw new RuntimeException(e);
}
// add table name so we don't have to do case insensitive resolution again
partitionSchemaBuilder.addMetadata(TABLE_METADATA, table.getName());
Map<String, ValueSet> summary = request.getConstraints().getSummary();
List<String> requestedCols = request.getSchema().getFields().stream().map(Field::getName).collect(Collectors.toList());
DynamoDBIndex index = DDBPredicateUtils.getBestIndexForPredicates(table, requestedCols, summary);
logger.info("using index: {}", index.getName());
String hashKeyName = index.getHashKey();
ValueSet hashKeyValueSet = summary.get(hashKeyName);
List<Object> hashKeyValues = (hashKeyValueSet != null) ? DDBPredicateUtils.getHashKeyAttributeValues(hashKeyValueSet) : Collections.emptyList();
DDBRecordMetadata recordMetadata = new DDBRecordMetadata(request.getSchema());
Set<String> columnsToIgnore = new HashSet<>();
List<AttributeValue> valueAccumulator = new ArrayList<>();
IncrementingValueNameProducer valueNameProducer = new IncrementingValueNameProducer();
if (!hashKeyValues.isEmpty()) {
// can "partition" on hash key
partitionSchemaBuilder.addField(hashKeyName, hashKeyValueSet.getType());
partitionSchemaBuilder.addMetadata(HASH_KEY_NAME_METADATA, hashKeyName);
columnsToIgnore.add(hashKeyName);
partitionSchemaBuilder.addMetadata(PARTITION_TYPE_METADATA, QUERY_PARTITION_TYPE);
if (!table.getName().equals(index.getName())) {
partitionSchemaBuilder.addMetadata(INDEX_METADATA, index.getName());
}
// add range key filter if there is one
Optional<String> rangeKey = index.getRangeKey();
if (rangeKey.isPresent()) {
String rangeKeyName = rangeKey.get();
if (summary.containsKey(rangeKeyName)) {
String rangeKeyFilter = DDBPredicateUtils.generateSingleColumnFilter(rangeKeyName, summary.get(rangeKeyName), valueAccumulator, valueNameProducer, recordMetadata);
partitionSchemaBuilder.addMetadata(RANGE_KEY_NAME_METADATA, rangeKeyName);
partitionSchemaBuilder.addMetadata(RANGE_KEY_FILTER_METADATA, rangeKeyFilter);
columnsToIgnore.add(rangeKeyName);
}
}
} else {
// always fall back to a scan
partitionSchemaBuilder.addField(SEGMENT_COUNT_METADATA, Types.MinorType.INT.getType());
partitionSchemaBuilder.addMetadata(PARTITION_TYPE_METADATA, SCAN_PARTITION_TYPE);
}
// We will exclude the columns with custom types from filter clause when querying/scanning DDB
// As those types are not natively supported by DDB or Glue
// So we have to filter the results after the query/scan result is returned
columnsToIgnore.addAll(recordMetadata.getNonComparableColumns());
precomputeAdditionalMetadata(columnsToIgnore, summary, valueAccumulator, valueNameProducer, partitionSchemaBuilder, recordMetadata);
}
use of com.amazonaws.athena.connectors.dynamodb.util.IncrementingValueNameProducer in project aws-athena-query-federation by awslabs.
the class DynamoDBMetadataHandler method precomputeAdditionalMetadata.
/*
Injects additional metadata into the partition schema like a non-key filter expression for additional DDB-side filtering
*/
private void precomputeAdditionalMetadata(Set<String> columnsToIgnore, Map<String, ValueSet> predicates, List<AttributeValue> accumulator, IncrementingValueNameProducer valueNameProducer, SchemaBuilder partitionsSchemaBuilder, DDBRecordMetadata recordMetadata) {
// precompute non-key filter
String filterExpression = DDBPredicateUtils.generateFilterExpression(columnsToIgnore, predicates, accumulator, valueNameProducer, recordMetadata);
if (filterExpression != null) {
partitionsSchemaBuilder.addMetadata(NON_KEY_FILTER_METADATA, filterExpression);
}
if (!accumulator.isEmpty()) {
// add in mappings for aliased columns and value placeholders
Map<String, String> aliasedColumns = new HashMap<>();
for (String column : predicates.keySet()) {
aliasedColumns.put(DDBPredicateUtils.aliasColumn(column), column);
}
Map<String, AttributeValue> expressionValueMapping = new HashMap<>();
// IncrementingValueNameProducer is repeatable for simplicity
IncrementingValueNameProducer valueNameProducer2 = new IncrementingValueNameProducer();
for (AttributeValue value : accumulator) {
expressionValueMapping.put(valueNameProducer2.getNext(), value);
}
partitionsSchemaBuilder.addMetadata(EXPRESSION_NAMES_METADATA, Jackson.toJsonString(aliasedColumns));
partitionsSchemaBuilder.addMetadata(EXPRESSION_VALUES_METADATA, Jackson.toJsonString(expressionValueMapping));
}
}
Aggregations