Search in sources :

Example 1 with DynamoDBFieldResolver

use of com.amazonaws.athena.connectors.dynamodb.resolver.DynamoDBFieldResolver in project aws-athena-query-federation by awslabs.

the class DynamoDBRecordHandler method readWithConstraint.

/**
 * Reads data from DynamoDB by submitting either a Query or a Scan, depending
 * on the type of split, and includes any filters specified in the split.
 *
 * @see RecordHandler
 */
@Override
protected void readWithConstraint(BlockSpiller spiller, ReadRecordsRequest recordsRequest, QueryStatusChecker queryStatusChecker) throws ExecutionException {
    Split split = recordsRequest.getSplit();
    // use the property instead of the request table name because of case sensitivity
    String tableName = split.getProperty(TABLE_METADATA);
    invokerCache.get(tableName).setBlockSpiller(spiller);
    Iterator<Map<String, AttributeValue>> itemIterator = getIterator(split, tableName, recordsRequest.getSchema());
    DDBRecordMetadata recordMetadata = new DDBRecordMetadata(recordsRequest.getSchema());
    DynamoDBFieldResolver resolver = new DynamoDBFieldResolver(recordMetadata);
    long numRows = 0;
    AtomicLong numResultRows = new AtomicLong(0);
    while (itemIterator.hasNext()) {
        if (!queryStatusChecker.isQueryRunning()) {
            // we can stop processing because the query waiting for this data has already terminated
            return;
        }
        numRows++;
        spiller.writeRows((Block block, int rowNum) -> {
            Map<String, AttributeValue> item = itemIterator.next();
            if (item == null) {
                // had not made any DDB calls yet and there may be zero items returned when it does
                return 0;
            }
            boolean matched = true;
            numResultRows.getAndIncrement();
            // TODO refactor to use GeneratedRowWriter to improve performance
            for (Field nextField : recordsRequest.getSchema().getFields()) {
                Object value = ItemUtils.toSimpleValue(item.get(nextField.getName()));
                Types.MinorType fieldType = Types.getMinorTypeForArrowType(nextField.getType());
                value = DDBTypeUtils.coerceValueToExpectedType(value, nextField, fieldType, recordMetadata);
                try {
                    switch(fieldType) {
                        case LIST:
                            // DDB may return Set so coerce to List. Also coerce each List item to the correct type.
                            List valueAsList = value != null ? DDBTypeUtils.coerceListToExpectedType(value, nextField, recordMetadata) : null;
                            matched &= block.offerComplexValue(nextField.getName(), rowNum, resolver, valueAsList);
                            break;
                        case STRUCT:
                            matched &= block.offerComplexValue(nextField.getName(), rowNum, resolver, value);
                            break;
                        default:
                            matched &= block.offerValue(nextField.getName(), rowNum, value);
                            break;
                    }
                    if (!matched) {
                        return 0;
                    }
                } catch (Exception ex) {
                    throw new RuntimeException("Error while processing field " + nextField.getName(), ex);
                }
            }
            return 1;
        });
    }
    logger.info("readWithConstraint: numRows[{}] numResultRows[{}]", numRows, numResultRows.get());
}
Also used : Types(org.apache.arrow.vector.types.Types) AttributeValue(com.amazonaws.services.dynamodbv2.model.AttributeValue) TimeoutException(java.util.concurrent.TimeoutException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) DynamoDBFieldResolver(com.amazonaws.athena.connectors.dynamodb.resolver.DynamoDBFieldResolver) Field(org.apache.arrow.vector.types.pojo.Field) AtomicLong(java.util.concurrent.atomic.AtomicLong) Block(com.amazonaws.athena.connector.lambda.data.Block) List(java.util.List) Split(com.amazonaws.athena.connector.lambda.domain.Split) Map(java.util.Map) HashMap(java.util.HashMap) DDBRecordMetadata(com.amazonaws.athena.connectors.dynamodb.util.DDBRecordMetadata)

Aggregations

Block (com.amazonaws.athena.connector.lambda.data.Block)1 Split (com.amazonaws.athena.connector.lambda.domain.Split)1 DynamoDBFieldResolver (com.amazonaws.athena.connectors.dynamodb.resolver.DynamoDBFieldResolver)1 DDBRecordMetadata (com.amazonaws.athena.connectors.dynamodb.util.DDBRecordMetadata)1 AttributeValue (com.amazonaws.services.dynamodbv2.model.AttributeValue)1 IOException (java.io.IOException)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 ExecutionException (java.util.concurrent.ExecutionException)1 TimeoutException (java.util.concurrent.TimeoutException)1 AtomicLong (java.util.concurrent.atomic.AtomicLong)1 Types (org.apache.arrow.vector.types.Types)1 Field (org.apache.arrow.vector.types.pojo.Field)1