use of com.amazonaws.athena.connectors.dynamodb.util.DDBRecordMetadata in project aws-athena-query-federation by awslabs.
the class DynamoDBMetadataHandler method enhancePartitionSchema.
/**
* Generates a partition schema with metadata derived from available predicates. This metadata will be
* copied to splits in the #doGetSplits call. At this point it is determined whether we can partition
* by hash key or fall back to a full table scan.
*
* @see GlueMetadataHandler
*/
@Override
public void enhancePartitionSchema(SchemaBuilder partitionSchemaBuilder, GetTableLayoutRequest request) {
// use the source table name from the schema if available (in case Glue table name != actual table name)
String tableName = getSourceTableName(request.getSchema());
if (tableName == null) {
tableName = request.getTableName().getTableName();
}
DynamoDBTable table = null;
try {
table = tableResolver.getTableMetadata(tableName);
} catch (TimeoutException e) {
throw new RuntimeException(e);
}
// add table name so we don't have to do case insensitive resolution again
partitionSchemaBuilder.addMetadata(TABLE_METADATA, table.getName());
Map<String, ValueSet> summary = request.getConstraints().getSummary();
List<String> requestedCols = request.getSchema().getFields().stream().map(Field::getName).collect(Collectors.toList());
DynamoDBIndex index = DDBPredicateUtils.getBestIndexForPredicates(table, requestedCols, summary);
logger.info("using index: {}", index.getName());
String hashKeyName = index.getHashKey();
ValueSet hashKeyValueSet = summary.get(hashKeyName);
List<Object> hashKeyValues = (hashKeyValueSet != null) ? DDBPredicateUtils.getHashKeyAttributeValues(hashKeyValueSet) : Collections.emptyList();
DDBRecordMetadata recordMetadata = new DDBRecordMetadata(request.getSchema());
Set<String> columnsToIgnore = new HashSet<>();
List<AttributeValue> valueAccumulator = new ArrayList<>();
IncrementingValueNameProducer valueNameProducer = new IncrementingValueNameProducer();
if (!hashKeyValues.isEmpty()) {
// can "partition" on hash key
partitionSchemaBuilder.addField(hashKeyName, hashKeyValueSet.getType());
partitionSchemaBuilder.addMetadata(HASH_KEY_NAME_METADATA, hashKeyName);
columnsToIgnore.add(hashKeyName);
partitionSchemaBuilder.addMetadata(PARTITION_TYPE_METADATA, QUERY_PARTITION_TYPE);
if (!table.getName().equals(index.getName())) {
partitionSchemaBuilder.addMetadata(INDEX_METADATA, index.getName());
}
// add range key filter if there is one
Optional<String> rangeKey = index.getRangeKey();
if (rangeKey.isPresent()) {
String rangeKeyName = rangeKey.get();
if (summary.containsKey(rangeKeyName)) {
String rangeKeyFilter = DDBPredicateUtils.generateSingleColumnFilter(rangeKeyName, summary.get(rangeKeyName), valueAccumulator, valueNameProducer, recordMetadata);
partitionSchemaBuilder.addMetadata(RANGE_KEY_NAME_METADATA, rangeKeyName);
partitionSchemaBuilder.addMetadata(RANGE_KEY_FILTER_METADATA, rangeKeyFilter);
columnsToIgnore.add(rangeKeyName);
}
}
} else {
// always fall back to a scan
partitionSchemaBuilder.addField(SEGMENT_COUNT_METADATA, Types.MinorType.INT.getType());
partitionSchemaBuilder.addMetadata(PARTITION_TYPE_METADATA, SCAN_PARTITION_TYPE);
}
// We will exclude the columns with custom types from filter clause when querying/scanning DDB
// As those types are not natively supported by DDB or Glue
// So we have to filter the results after the query/scan result is returned
columnsToIgnore.addAll(recordMetadata.getNonComparableColumns());
precomputeAdditionalMetadata(columnsToIgnore, summary, valueAccumulator, valueNameProducer, partitionSchemaBuilder, recordMetadata);
}
use of com.amazonaws.athena.connectors.dynamodb.util.DDBRecordMetadata in project aws-athena-query-federation by awslabs.
the class DynamoDBRecordHandler method readWithConstraint.
/**
* Reads data from DynamoDB by submitting either a Query or a Scan, depending
* on the type of split, and includes any filters specified in the split.
*
* @see RecordHandler
*/
@Override
protected void readWithConstraint(BlockSpiller spiller, ReadRecordsRequest recordsRequest, QueryStatusChecker queryStatusChecker) throws ExecutionException {
Split split = recordsRequest.getSplit();
// use the property instead of the request table name because of case sensitivity
String tableName = split.getProperty(TABLE_METADATA);
invokerCache.get(tableName).setBlockSpiller(spiller);
Iterator<Map<String, AttributeValue>> itemIterator = getIterator(split, tableName, recordsRequest.getSchema());
DDBRecordMetadata recordMetadata = new DDBRecordMetadata(recordsRequest.getSchema());
DynamoDBFieldResolver resolver = new DynamoDBFieldResolver(recordMetadata);
long numRows = 0;
AtomicLong numResultRows = new AtomicLong(0);
while (itemIterator.hasNext()) {
if (!queryStatusChecker.isQueryRunning()) {
// we can stop processing because the query waiting for this data has already terminated
return;
}
numRows++;
spiller.writeRows((Block block, int rowNum) -> {
Map<String, AttributeValue> item = itemIterator.next();
if (item == null) {
// had not made any DDB calls yet and there may be zero items returned when it does
return 0;
}
boolean matched = true;
numResultRows.getAndIncrement();
// TODO refactor to use GeneratedRowWriter to improve performance
for (Field nextField : recordsRequest.getSchema().getFields()) {
Object value = ItemUtils.toSimpleValue(item.get(nextField.getName()));
Types.MinorType fieldType = Types.getMinorTypeForArrowType(nextField.getType());
value = DDBTypeUtils.coerceValueToExpectedType(value, nextField, fieldType, recordMetadata);
try {
switch(fieldType) {
case LIST:
// DDB may return Set so coerce to List. Also coerce each List item to the correct type.
List valueAsList = value != null ? DDBTypeUtils.coerceListToExpectedType(value, nextField, recordMetadata) : null;
matched &= block.offerComplexValue(nextField.getName(), rowNum, resolver, valueAsList);
break;
case STRUCT:
matched &= block.offerComplexValue(nextField.getName(), rowNum, resolver, value);
break;
default:
matched &= block.offerValue(nextField.getName(), rowNum, value);
break;
}
if (!matched) {
return 0;
}
} catch (Exception ex) {
throw new RuntimeException("Error while processing field " + nextField.getName(), ex);
}
}
return 1;
});
}
logger.info("readWithConstraint: numRows[{}] numResultRows[{}]", numRows, numResultRows.get());
}
Aggregations