Search in sources :

Example 31 with GetTableLayoutRequest

use of com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest in project aws-athena-query-federation by awslabs.

the class DataLakeGen2MetadataHandlerTest method doGetSplitsWithNoPartition.

@Test
public void doGetSplitsWithNoPartition() throws Exception {
    BlockAllocator blockAllocator = new BlockAllocatorImpl();
    Constraints constraints = Mockito.mock(Constraints.class);
    TableName tableName = new TableName("testSchema", "testTable");
    Schema partitionSchema = this.dataLakeGen2MetadataHandler.getPartitionSchema("testCatalogName");
    Set<String> partitionCols = partitionSchema.getFields().stream().map(Field::getName).collect(Collectors.toSet());
    GetTableLayoutRequest getTableLayoutRequest = new GetTableLayoutRequest(this.federatedIdentity, "testQueryId", "testCatalogName", tableName, constraints, partitionSchema, partitionCols);
    GetTableLayoutResponse getTableLayoutResponse = this.dataLakeGen2MetadataHandler.doGetTableLayout(blockAllocator, getTableLayoutRequest);
    BlockAllocator splitBlockAllocator = new BlockAllocatorImpl();
    GetSplitsRequest getSplitsRequest = new GetSplitsRequest(this.federatedIdentity, "testQueryId", "testCatalogName", tableName, getTableLayoutResponse.getPartitions(), new ArrayList<>(partitionCols), constraints, null);
    GetSplitsResponse getSplitsResponse = this.dataLakeGen2MetadataHandler.doGetSplits(splitBlockAllocator, getSplitsRequest);
    Set<Map<String, String>> expectedSplits = new HashSet<>();
    expectedSplits.add(Collections.singletonMap(DataLakeGen2MetadataHandler.PARTITION_NUMBER, "0"));
    Assert.assertEquals(expectedSplits.size(), getSplitsResponse.getSplits().size());
    Set<Map<String, String>> actualSplits = getSplitsResponse.getSplits().stream().map(Split::getProperties).collect(Collectors.toSet());
    Assert.assertEquals(expectedSplits, actualSplits);
}
Also used : GetSplitsRequest(com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest) Schema(org.apache.arrow.vector.types.pojo.Schema) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) GetTableLayoutResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse) BlockAllocatorImpl(com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl) GetSplitsResponse(com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse) BlockAllocator(com.amazonaws.athena.connector.lambda.data.BlockAllocator) GetTableLayoutRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest) Map(java.util.Map) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 32 with GetTableLayoutRequest

use of com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest in project aws-athena-query-federation by awslabs.

the class DataLakeGen2MuxMetadataHandlerTest method getPartitions.

@Test
public void getPartitions() throws Exception {
    GetTableLayoutRequest getTableLayoutRequest = Mockito.mock(GetTableLayoutRequest.class);
    Mockito.when(getTableLayoutRequest.getCatalogName()).thenReturn("fakedatabase");
    this.jdbcMetadataHandler.getPartitions(Mockito.mock(BlockWriter.class), getTableLayoutRequest, queryStatusChecker);
    Mockito.verify(this.dataLakeGen2MetadataHandler, Mockito.times(1)).getPartitions(Mockito.any(BlockWriter.class), Mockito.eq(getTableLayoutRequest), Mockito.eq(queryStatusChecker));
}
Also used : GetTableLayoutRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest) BlockWriter(com.amazonaws.athena.connector.lambda.data.BlockWriter) Test(org.junit.Test)

Example 33 with GetTableLayoutRequest

use of com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest in project aws-athena-query-federation by awslabs.

the class DynamoDBMetadataHandlerTest method doGetSplitsQuery.

@Test
public void doGetSplitsQuery() throws Exception {
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    EquatableValueSet.Builder valueSet = EquatableValueSet.newBuilder(allocator, Types.MinorType.VARCHAR.getType(), true, false);
    for (int i = 0; i < 2000; i++) {
        valueSet.add("test_str_" + i);
    }
    constraintsMap.put("col_0", valueSet.build());
    GetTableLayoutResponse layoutResponse = handler.doGetTableLayout(allocator, new GetTableLayoutRequest(TEST_IDENTITY, TEST_QUERY_ID, TEST_CATALOG_NAME, TEST_TABLE_NAME, new Constraints(constraintsMap), SchemaBuilder.newBuilder().build(), Collections.EMPTY_SET));
    GetSplitsRequest req = new GetSplitsRequest(TEST_IDENTITY, TEST_QUERY_ID, TEST_CATALOG_NAME, TEST_TABLE_NAME, layoutResponse.getPartitions(), ImmutableList.of("col_0"), new Constraints(new HashMap<>()), null);
    logger.info("doGetSplits: req[{}]", req);
    GetSplitsResponse response = handler.doGetSplits(allocator, req);
    assertThat(response.getRequestType(), equalTo(MetadataRequestType.GET_SPLITS));
    String continuationToken = response.getContinuationToken();
    logger.info("doGetSplits: continuationToken[{}] - numSplits[{}]", continuationToken, response.getSplits().size());
    assertThat(continuationToken, equalTo(String.valueOf(MAX_SPLITS_PER_REQUEST - 1)));
    assertThat(response.getSplits().size(), equalTo(MAX_SPLITS_PER_REQUEST));
    assertThat(response.getSplits().stream().map(split -> split.getProperty("col_0")).distinct().count(), equalTo((long) MAX_SPLITS_PER_REQUEST));
    response = handler.doGetSplits(allocator, new GetSplitsRequest(req, continuationToken));
    logger.info("doGetSplits: continuationToken[{}] - numSplits[{}]", continuationToken, response.getSplits().size());
    assertThat(response.getContinuationToken(), equalTo(null));
    assertThat(response.getSplits().size(), equalTo(MAX_SPLITS_PER_REQUEST));
    assertThat(response.getSplits().stream().map(split -> split.getProperty("col_0")).distinct().count(), equalTo((long) MAX_SPLITS_PER_REQUEST));
}
Also used : Types(org.apache.arrow.vector.types.Types) LoggerFactory(org.slf4j.LoggerFactory) SEGMENT_COUNT_METADATA(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.SEGMENT_COUNT_METADATA) BlockAllocator(com.amazonaws.athena.connector.lambda.data.BlockAllocator) SOURCE_TABLE_PROPERTY(com.amazonaws.athena.connector.lambda.handlers.GlueMetadataHandler.SOURCE_TABLE_PROPERTY) GetDatabasesResult(com.amazonaws.services.glue.model.GetDatabasesResult) AttributeValue(com.amazonaws.services.dynamodbv2.model.AttributeValue) MAX_SPLITS_PER_REQUEST(com.amazonaws.athena.connectors.dynamodb.DynamoDBMetadataHandler.MAX_SPLITS_PER_REQUEST) After(org.junit.After) Map(java.util.Map) DATETIME_FORMAT_MAPPING_PROPERTY_NORMALIZED(com.amazonaws.athena.connector.lambda.handlers.GlueMetadataHandler.DATETIME_FORMAT_MAPPING_PROPERTY_NORMALIZED) DYNAMO_DB_FLAG(com.amazonaws.athena.connectors.dynamodb.DynamoDBMetadataHandler.DYNAMO_DB_FLAG) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) ZoneOffset(java.time.ZoneOffset) BlockAllocatorImpl(com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl) GetTableRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableRequest) GetTablesResult(com.amazonaws.services.glue.model.GetTablesResult) COLUMN_NAME_MAPPING_PROPERTY(com.amazonaws.athena.connector.lambda.handlers.GlueMetadataHandler.COLUMN_NAME_MAPPING_PROPERTY) RANGE_KEY_FILTER_METADATA(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.RANGE_KEY_FILTER_METADATA) ListSchemasResponse(com.amazonaws.athena.connector.lambda.metadata.ListSchemasResponse) AmazonServiceException(com.amazonaws.AmazonServiceException) ImmutableMap(com.google.common.collect.ImmutableMap) TimeZone(java.util.TimeZone) Split(com.amazonaws.athena.connector.lambda.domain.Split) Column(com.amazonaws.services.glue.model.Column) PARTITION_TYPE_METADATA(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.PARTITION_TYPE_METADATA) EXPRESSION_NAMES_METADATA(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.EXPRESSION_NAMES_METADATA) Instant(java.time.Instant) QUERY_PARTITION_TYPE(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.QUERY_PARTITION_TYPE) Collectors(java.util.stream.Collectors) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Matchers.any(org.mockito.Matchers.any) GetTableLayoutResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse) EXPRESSION_VALUES_METADATA(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.EXPRESSION_VALUES_METADATA) RANGE_KEY_NAME_METADATA(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.RANGE_KEY_NAME_METADATA) List(java.util.List) GetTableLayoutRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest) Matchers.equalTo(org.hamcrest.Matchers.equalTo) Matchers.is(org.hamcrest.Matchers.is) ListSchemasRequest(com.amazonaws.athena.connector.lambda.metadata.ListSchemasRequest) GetSplitsResponse(com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse) Iterables(com.google.common.collect.Iterables) DATETIME_FORMAT_MAPPING_PROPERTY(com.amazonaws.athena.connector.lambda.handlers.GlueMetadataHandler.DATETIME_FORMAT_MAPPING_PROPERTY) SCAN_PARTITION_TYPE(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.SCAN_PARTITION_TYPE) Mock(org.mockito.Mock) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) RunWith(org.junit.runner.RunWith) LocalDateTime(java.time.LocalDateTime) HashMap(java.util.HashMap) Jackson(com.amazonaws.util.json.Jackson) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Range(com.amazonaws.athena.connector.lambda.domain.predicate.Range) MetadataResponse(com.amazonaws.athena.connector.lambda.metadata.MetadataResponse) ImmutableList(com.google.common.collect.ImmutableList) DEFAULT_SCHEMA(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.DEFAULT_SCHEMA) AWSGlue(com.amazonaws.services.glue.AWSGlue) SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) TestName(org.junit.rules.TestName) LocalKeyFactory(com.amazonaws.athena.connector.lambda.security.LocalKeyFactory) ItemUtils(com.amazonaws.services.dynamodbv2.document.ItemUtils) Table(com.amazonaws.services.glue.model.Table) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) INDEX_METADATA(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.INDEX_METADATA) ListTablesResponse(com.amazonaws.athena.connector.lambda.metadata.ListTablesResponse) NON_KEY_FILTER_METADATA(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.NON_KEY_FILTER_METADATA) GetSplitsRequest(com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest) Before(org.junit.Before) HASH_KEY_NAME_METADATA(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.HASH_KEY_NAME_METADATA) Logger(org.slf4j.Logger) AmazonAthena(com.amazonaws.services.athena.AmazonAthena) SortedRangeSet(com.amazonaws.athena.connector.lambda.domain.predicate.SortedRangeSet) AWSSecretsManager(com.amazonaws.services.secretsmanager.AWSSecretsManager) Test(org.junit.Test) UNLIMITED_PAGE_SIZE_VALUE(com.amazonaws.athena.connector.lambda.metadata.ListTablesRequest.UNLIMITED_PAGE_SIZE_VALUE) SEGMENT_ID_PROPERTY(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.SEGMENT_ID_PROPERTY) Mockito.when(org.mockito.Mockito.when) MetadataRequestType(com.amazonaws.athena.connector.lambda.metadata.MetadataRequestType) GetTableResult(com.amazonaws.services.glue.model.GetTableResult) TABLE_METADATA(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.TABLE_METADATA) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) StorageDescriptor(com.amazonaws.services.glue.model.StorageDescriptor) Rule(org.junit.Rule) MockitoJUnitRunner(org.mockito.runners.MockitoJUnitRunner) ChronoUnit(java.time.temporal.ChronoUnit) ListTablesRequest(com.amazonaws.athena.connector.lambda.metadata.ListTablesRequest) Database(com.amazonaws.services.glue.model.Database) Collections(java.util.Collections) GetTableResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableResponse) GetSplitsRequest(com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest) HashMap(java.util.HashMap) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) GetTableLayoutResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse) GetSplitsResponse(com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse) GetTableLayoutRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) Test(org.junit.Test)

Example 34 with GetTableLayoutRequest

use of com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest in project aws-athena-query-federation by awslabs.

the class DynamoDBMetadataHandlerTest method doGetTableLayoutQueryIndex.

@Test
public void doGetTableLayoutQueryIndex() throws Exception {
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    SortedRangeSet.Builder dateValueSet = SortedRangeSet.newBuilder(Types.MinorType.DATEDAY.getType(), false);
    SortedRangeSet.Builder timeValueSet = SortedRangeSet.newBuilder(Types.MinorType.DATEMILLI.getType(), false);
    LocalDateTime dateTime = LocalDateTime.of(2019, 9, 23, 11, 18, 37);
    // Set to Epoch time
    Instant epoch = Instant.MIN;
    dateValueSet.add(Range.equal(allocator, Types.MinorType.DATEDAY.getType(), ChronoUnit.DAYS.between(epoch, dateTime.toInstant(ZoneOffset.UTC))));
    LocalDateTime dateTime2 = dateTime.plusHours(26);
    dateValueSet.add(Range.equal(allocator, Types.MinorType.DATEDAY.getType(), ChronoUnit.DAYS.between(epoch, dateTime2.toInstant(ZoneOffset.UTC))));
    long startTime = dateTime.toInstant(ZoneOffset.UTC).toEpochMilli();
    long endTime = dateTime2.toInstant(ZoneOffset.UTC).toEpochMilli();
    timeValueSet.add(Range.range(allocator, Types.MinorType.DATEMILLI.getType(), startTime, true, endTime, true));
    constraintsMap.put("col_4", dateValueSet.build());
    constraintsMap.put("col_5", timeValueSet.build());
    GetTableLayoutResponse res = handler.doGetTableLayout(allocator, new GetTableLayoutRequest(TEST_IDENTITY, TEST_QUERY_ID, TEST_CATALOG_NAME, TEST_TABLE_NAME, new Constraints(constraintsMap), SchemaBuilder.newBuilder().build(), Collections.EMPTY_SET));
    logger.info("doGetTableLayout schema - {}", res.getPartitions().getSchema());
    logger.info("doGetTableLayout partitions - {}", res.getPartitions());
    assertThat(res.getPartitions().getSchema().getCustomMetadata().get(PARTITION_TYPE_METADATA), equalTo(QUERY_PARTITION_TYPE));
    assertThat(res.getPartitions().getSchema().getCustomMetadata().containsKey(INDEX_METADATA), is(true));
    assertThat(res.getPartitions().getSchema().getCustomMetadata().get(INDEX_METADATA), equalTo("test_index"));
    assertThat(res.getPartitions().getSchema().getCustomMetadata().get(HASH_KEY_NAME_METADATA), equalTo("col_4"));
    assertThat(res.getPartitions().getRowCount(), equalTo(2));
    assertThat(res.getPartitions().getSchema().getCustomMetadata().get(RANGE_KEY_NAME_METADATA), equalTo("col_5"));
    assertThat(res.getPartitions().getSchema().getCustomMetadata().get(RANGE_KEY_FILTER_METADATA), equalTo("(#col_5 >= :v0 AND #col_5 <= :v1)"));
    ImmutableMap<String, String> expressionNames = ImmutableMap.of("#col_4", "col_4", "#col_5", "col_5");
    assertThat(res.getPartitions().getSchema().getCustomMetadata().get(EXPRESSION_NAMES_METADATA), equalTo(Jackson.toJsonString(expressionNames)));
    ImmutableMap<String, AttributeValue> expressionValues = ImmutableMap.of(":v0", ItemUtils.toAttributeValue(startTime), ":v1", ItemUtils.toAttributeValue(endTime));
    assertThat(res.getPartitions().getSchema().getCustomMetadata().get(EXPRESSION_VALUES_METADATA), equalTo(Jackson.toJsonString(expressionValues)));
}
Also used : LocalDateTime(java.time.LocalDateTime) AttributeValue(com.amazonaws.services.dynamodbv2.model.AttributeValue) HashMap(java.util.HashMap) Instant(java.time.Instant) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) SortedRangeSet(com.amazonaws.athena.connector.lambda.domain.predicate.SortedRangeSet) GetTableLayoutResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse) GetTableLayoutRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) Test(org.junit.Test)

Example 35 with GetTableLayoutRequest

use of com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest in project aws-athena-query-federation by awslabs.

the class DynamoDBMetadataHandlerTest method doGetSplitsScan.

@Test
public void doGetSplitsScan() throws Exception {
    GetTableLayoutResponse layoutResponse = handler.doGetTableLayout(allocator, new GetTableLayoutRequest(TEST_IDENTITY, TEST_QUERY_ID, TEST_CATALOG_NAME, TEST_TABLE_NAME, new Constraints(ImmutableMap.of()), SchemaBuilder.newBuilder().build(), Collections.EMPTY_SET));
    GetSplitsRequest req = new GetSplitsRequest(TEST_IDENTITY, TEST_QUERY_ID, TEST_CATALOG_NAME, TEST_TABLE_NAME, layoutResponse.getPartitions(), ImmutableList.of(), new Constraints(new HashMap<>()), null);
    logger.info("doGetSplits: req[{}]", req);
    MetadataResponse rawResponse = handler.doGetSplits(allocator, req);
    assertThat(rawResponse.getRequestType(), equalTo(MetadataRequestType.GET_SPLITS));
    GetSplitsResponse response = (GetSplitsResponse) rawResponse;
    String continuationToken = response.getContinuationToken();
    logger.info("doGetSplits: continuationToken[{}] - numSplits[{}]", continuationToken, response.getSplits().size());
    assertThat(continuationToken == null, is(true));
    Split split = Iterables.getOnlyElement(response.getSplits());
    assertThat(split.getProperty(SEGMENT_ID_PROPERTY), equalTo("0"));
    logger.info("doGetSplitsScan: exit");
}
Also used : Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) GetTableLayoutResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse) GetSplitsRequest(com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest) HashMap(java.util.HashMap) GetSplitsResponse(com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse) GetTableLayoutRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest) MetadataResponse(com.amazonaws.athena.connector.lambda.metadata.MetadataResponse) Split(com.amazonaws.athena.connector.lambda.domain.Split) Test(org.junit.Test)

Aggregations

GetTableLayoutRequest (com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest)76 Test (org.junit.Test)71 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)54 Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)47 GetTableLayoutResponse (com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse)42 Schema (org.apache.arrow.vector.types.pojo.Schema)39 BlockAllocator (com.amazonaws.athena.connector.lambda.data.BlockAllocator)33 BlockAllocatorImpl (com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl)27 ResultSet (java.sql.ResultSet)22 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)22 GetSplitsRequest (com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest)17 GetSplitsResponse (com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse)17 PreparedStatement (java.sql.PreparedStatement)17 HashMap (java.util.HashMap)16 HashSet (java.util.HashSet)16 ArrayList (java.util.ArrayList)15 BlockWriter (com.amazonaws.athena.connector.lambda.data.BlockWriter)14 Map (java.util.Map)14 SchemaBuilder (com.amazonaws.athena.connector.lambda.data.SchemaBuilder)12 ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)11