Search in sources :

Example 1 with MAX_SPLITS_PER_REQUEST

use of com.amazonaws.athena.connectors.dynamodb.DynamoDBMetadataHandler.MAX_SPLITS_PER_REQUEST in project aws-athena-query-federation by awslabs.

the class DynamoDBMetadataHandlerTest method doGetSplitsQuery.

@Test
public void doGetSplitsQuery() throws Exception {
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    EquatableValueSet.Builder valueSet = EquatableValueSet.newBuilder(allocator, Types.MinorType.VARCHAR.getType(), true, false);
    for (int i = 0; i < 2000; i++) {
        valueSet.add("test_str_" + i);
    }
    constraintsMap.put("col_0", valueSet.build());
    GetTableLayoutResponse layoutResponse = handler.doGetTableLayout(allocator, new GetTableLayoutRequest(TEST_IDENTITY, TEST_QUERY_ID, TEST_CATALOG_NAME, TEST_TABLE_NAME, new Constraints(constraintsMap), SchemaBuilder.newBuilder().build(), Collections.EMPTY_SET));
    GetSplitsRequest req = new GetSplitsRequest(TEST_IDENTITY, TEST_QUERY_ID, TEST_CATALOG_NAME, TEST_TABLE_NAME, layoutResponse.getPartitions(), ImmutableList.of("col_0"), new Constraints(new HashMap<>()), null);
    logger.info("doGetSplits: req[{}]", req);
    GetSplitsResponse response = handler.doGetSplits(allocator, req);
    assertThat(response.getRequestType(), equalTo(MetadataRequestType.GET_SPLITS));
    String continuationToken = response.getContinuationToken();
    logger.info("doGetSplits: continuationToken[{}] - numSplits[{}]", continuationToken, response.getSplits().size());
    assertThat(continuationToken, equalTo(String.valueOf(MAX_SPLITS_PER_REQUEST - 1)));
    assertThat(response.getSplits().size(), equalTo(MAX_SPLITS_PER_REQUEST));
    assertThat(response.getSplits().stream().map(split -> split.getProperty("col_0")).distinct().count(), equalTo((long) MAX_SPLITS_PER_REQUEST));
    response = handler.doGetSplits(allocator, new GetSplitsRequest(req, continuationToken));
    logger.info("doGetSplits: continuationToken[{}] - numSplits[{}]", continuationToken, response.getSplits().size());
    assertThat(response.getContinuationToken(), equalTo(null));
    assertThat(response.getSplits().size(), equalTo(MAX_SPLITS_PER_REQUEST));
    assertThat(response.getSplits().stream().map(split -> split.getProperty("col_0")).distinct().count(), equalTo((long) MAX_SPLITS_PER_REQUEST));
}
Also used : Types(org.apache.arrow.vector.types.Types) LoggerFactory(org.slf4j.LoggerFactory) SEGMENT_COUNT_METADATA(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.SEGMENT_COUNT_METADATA) BlockAllocator(com.amazonaws.athena.connector.lambda.data.BlockAllocator) SOURCE_TABLE_PROPERTY(com.amazonaws.athena.connector.lambda.handlers.GlueMetadataHandler.SOURCE_TABLE_PROPERTY) GetDatabasesResult(com.amazonaws.services.glue.model.GetDatabasesResult) AttributeValue(com.amazonaws.services.dynamodbv2.model.AttributeValue) MAX_SPLITS_PER_REQUEST(com.amazonaws.athena.connectors.dynamodb.DynamoDBMetadataHandler.MAX_SPLITS_PER_REQUEST) After(org.junit.After) Map(java.util.Map) DATETIME_FORMAT_MAPPING_PROPERTY_NORMALIZED(com.amazonaws.athena.connector.lambda.handlers.GlueMetadataHandler.DATETIME_FORMAT_MAPPING_PROPERTY_NORMALIZED) DYNAMO_DB_FLAG(com.amazonaws.athena.connectors.dynamodb.DynamoDBMetadataHandler.DYNAMO_DB_FLAG) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) ZoneOffset(java.time.ZoneOffset) BlockAllocatorImpl(com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl) GetTableRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableRequest) GetTablesResult(com.amazonaws.services.glue.model.GetTablesResult) COLUMN_NAME_MAPPING_PROPERTY(com.amazonaws.athena.connector.lambda.handlers.GlueMetadataHandler.COLUMN_NAME_MAPPING_PROPERTY) RANGE_KEY_FILTER_METADATA(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.RANGE_KEY_FILTER_METADATA) ListSchemasResponse(com.amazonaws.athena.connector.lambda.metadata.ListSchemasResponse) AmazonServiceException(com.amazonaws.AmazonServiceException) ImmutableMap(com.google.common.collect.ImmutableMap) TimeZone(java.util.TimeZone) Split(com.amazonaws.athena.connector.lambda.domain.Split) Column(com.amazonaws.services.glue.model.Column) PARTITION_TYPE_METADATA(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.PARTITION_TYPE_METADATA) EXPRESSION_NAMES_METADATA(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.EXPRESSION_NAMES_METADATA) Instant(java.time.Instant) QUERY_PARTITION_TYPE(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.QUERY_PARTITION_TYPE) Collectors(java.util.stream.Collectors) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Matchers.any(org.mockito.Matchers.any) GetTableLayoutResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse) EXPRESSION_VALUES_METADATA(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.EXPRESSION_VALUES_METADATA) RANGE_KEY_NAME_METADATA(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.RANGE_KEY_NAME_METADATA) List(java.util.List) GetTableLayoutRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest) Matchers.equalTo(org.hamcrest.Matchers.equalTo) Matchers.is(org.hamcrest.Matchers.is) ListSchemasRequest(com.amazonaws.athena.connector.lambda.metadata.ListSchemasRequest) GetSplitsResponse(com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse) Iterables(com.google.common.collect.Iterables) DATETIME_FORMAT_MAPPING_PROPERTY(com.amazonaws.athena.connector.lambda.handlers.GlueMetadataHandler.DATETIME_FORMAT_MAPPING_PROPERTY) SCAN_PARTITION_TYPE(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.SCAN_PARTITION_TYPE) Mock(org.mockito.Mock) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) RunWith(org.junit.runner.RunWith) LocalDateTime(java.time.LocalDateTime) HashMap(java.util.HashMap) Jackson(com.amazonaws.util.json.Jackson) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Range(com.amazonaws.athena.connector.lambda.domain.predicate.Range) MetadataResponse(com.amazonaws.athena.connector.lambda.metadata.MetadataResponse) ImmutableList(com.google.common.collect.ImmutableList) DEFAULT_SCHEMA(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.DEFAULT_SCHEMA) AWSGlue(com.amazonaws.services.glue.AWSGlue) SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) TestName(org.junit.rules.TestName) LocalKeyFactory(com.amazonaws.athena.connector.lambda.security.LocalKeyFactory) ItemUtils(com.amazonaws.services.dynamodbv2.document.ItemUtils) Table(com.amazonaws.services.glue.model.Table) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) INDEX_METADATA(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.INDEX_METADATA) ListTablesResponse(com.amazonaws.athena.connector.lambda.metadata.ListTablesResponse) NON_KEY_FILTER_METADATA(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.NON_KEY_FILTER_METADATA) GetSplitsRequest(com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest) Before(org.junit.Before) HASH_KEY_NAME_METADATA(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.HASH_KEY_NAME_METADATA) Logger(org.slf4j.Logger) AmazonAthena(com.amazonaws.services.athena.AmazonAthena) SortedRangeSet(com.amazonaws.athena.connector.lambda.domain.predicate.SortedRangeSet) AWSSecretsManager(com.amazonaws.services.secretsmanager.AWSSecretsManager) Test(org.junit.Test) UNLIMITED_PAGE_SIZE_VALUE(com.amazonaws.athena.connector.lambda.metadata.ListTablesRequest.UNLIMITED_PAGE_SIZE_VALUE) SEGMENT_ID_PROPERTY(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.SEGMENT_ID_PROPERTY) Mockito.when(org.mockito.Mockito.when) MetadataRequestType(com.amazonaws.athena.connector.lambda.metadata.MetadataRequestType) GetTableResult(com.amazonaws.services.glue.model.GetTableResult) TABLE_METADATA(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.TABLE_METADATA) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) StorageDescriptor(com.amazonaws.services.glue.model.StorageDescriptor) Rule(org.junit.Rule) MockitoJUnitRunner(org.mockito.runners.MockitoJUnitRunner) ChronoUnit(java.time.temporal.ChronoUnit) ListTablesRequest(com.amazonaws.athena.connector.lambda.metadata.ListTablesRequest) Database(com.amazonaws.services.glue.model.Database) Collections(java.util.Collections) GetTableResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableResponse) GetSplitsRequest(com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest) HashMap(java.util.HashMap) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) GetTableLayoutResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse) GetSplitsResponse(com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse) GetTableLayoutRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) Test(org.junit.Test)

Aggregations

AmazonServiceException (com.amazonaws.AmazonServiceException)1 BlockAllocator (com.amazonaws.athena.connector.lambda.data.BlockAllocator)1 BlockAllocatorImpl (com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl)1 SchemaBuilder (com.amazonaws.athena.connector.lambda.data.SchemaBuilder)1 Split (com.amazonaws.athena.connector.lambda.domain.Split)1 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)1 Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)1 EquatableValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet)1 Range (com.amazonaws.athena.connector.lambda.domain.predicate.Range)1 SortedRangeSet (com.amazonaws.athena.connector.lambda.domain.predicate.SortedRangeSet)1 ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)1 COLUMN_NAME_MAPPING_PROPERTY (com.amazonaws.athena.connector.lambda.handlers.GlueMetadataHandler.COLUMN_NAME_MAPPING_PROPERTY)1 DATETIME_FORMAT_MAPPING_PROPERTY (com.amazonaws.athena.connector.lambda.handlers.GlueMetadataHandler.DATETIME_FORMAT_MAPPING_PROPERTY)1 DATETIME_FORMAT_MAPPING_PROPERTY_NORMALIZED (com.amazonaws.athena.connector.lambda.handlers.GlueMetadataHandler.DATETIME_FORMAT_MAPPING_PROPERTY_NORMALIZED)1 SOURCE_TABLE_PROPERTY (com.amazonaws.athena.connector.lambda.handlers.GlueMetadataHandler.SOURCE_TABLE_PROPERTY)1 GetSplitsRequest (com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest)1 GetSplitsResponse (com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse)1 GetTableLayoutRequest (com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest)1 GetTableLayoutResponse (com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse)1 GetTableRequest (com.amazonaws.athena.connector.lambda.metadata.GetTableRequest)1