Search in sources :

Example 26 with GetSplitsRequest

use of com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest in project aws-athena-query-federation by awslabs.

the class DataLakeGen2MuxMetadataHandlerTest method doGetSplits.

@Test
public void doGetSplits() {
    GetSplitsRequest getSplitsRequest = Mockito.mock(GetSplitsRequest.class);
    Mockito.when(getSplitsRequest.getCatalogName()).thenReturn("fakedatabase");
    this.jdbcMetadataHandler.doGetSplits(this.allocator, getSplitsRequest);
    Mockito.verify(this.dataLakeGen2MetadataHandler, Mockito.times(1)).doGetSplits(Mockito.eq(this.allocator), Mockito.eq(getSplitsRequest));
}
Also used : GetSplitsRequest(com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest) Test(org.junit.Test)

Example 27 with GetSplitsRequest

use of com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest in project aws-athena-query-federation by awslabs.

the class DynamoDBMetadataHandlerTest method doGetSplitsQuery.

@Test
public void doGetSplitsQuery() throws Exception {
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    EquatableValueSet.Builder valueSet = EquatableValueSet.newBuilder(allocator, Types.MinorType.VARCHAR.getType(), true, false);
    for (int i = 0; i < 2000; i++) {
        valueSet.add("test_str_" + i);
    }
    constraintsMap.put("col_0", valueSet.build());
    GetTableLayoutResponse layoutResponse = handler.doGetTableLayout(allocator, new GetTableLayoutRequest(TEST_IDENTITY, TEST_QUERY_ID, TEST_CATALOG_NAME, TEST_TABLE_NAME, new Constraints(constraintsMap), SchemaBuilder.newBuilder().build(), Collections.EMPTY_SET));
    GetSplitsRequest req = new GetSplitsRequest(TEST_IDENTITY, TEST_QUERY_ID, TEST_CATALOG_NAME, TEST_TABLE_NAME, layoutResponse.getPartitions(), ImmutableList.of("col_0"), new Constraints(new HashMap<>()), null);
    logger.info("doGetSplits: req[{}]", req);
    GetSplitsResponse response = handler.doGetSplits(allocator, req);
    assertThat(response.getRequestType(), equalTo(MetadataRequestType.GET_SPLITS));
    String continuationToken = response.getContinuationToken();
    logger.info("doGetSplits: continuationToken[{}] - numSplits[{}]", continuationToken, response.getSplits().size());
    assertThat(continuationToken, equalTo(String.valueOf(MAX_SPLITS_PER_REQUEST - 1)));
    assertThat(response.getSplits().size(), equalTo(MAX_SPLITS_PER_REQUEST));
    assertThat(response.getSplits().stream().map(split -> split.getProperty("col_0")).distinct().count(), equalTo((long) MAX_SPLITS_PER_REQUEST));
    response = handler.doGetSplits(allocator, new GetSplitsRequest(req, continuationToken));
    logger.info("doGetSplits: continuationToken[{}] - numSplits[{}]", continuationToken, response.getSplits().size());
    assertThat(response.getContinuationToken(), equalTo(null));
    assertThat(response.getSplits().size(), equalTo(MAX_SPLITS_PER_REQUEST));
    assertThat(response.getSplits().stream().map(split -> split.getProperty("col_0")).distinct().count(), equalTo((long) MAX_SPLITS_PER_REQUEST));
}
Also used : Types(org.apache.arrow.vector.types.Types) LoggerFactory(org.slf4j.LoggerFactory) SEGMENT_COUNT_METADATA(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.SEGMENT_COUNT_METADATA) BlockAllocator(com.amazonaws.athena.connector.lambda.data.BlockAllocator) SOURCE_TABLE_PROPERTY(com.amazonaws.athena.connector.lambda.handlers.GlueMetadataHandler.SOURCE_TABLE_PROPERTY) GetDatabasesResult(com.amazonaws.services.glue.model.GetDatabasesResult) AttributeValue(com.amazonaws.services.dynamodbv2.model.AttributeValue) MAX_SPLITS_PER_REQUEST(com.amazonaws.athena.connectors.dynamodb.DynamoDBMetadataHandler.MAX_SPLITS_PER_REQUEST) After(org.junit.After) Map(java.util.Map) DATETIME_FORMAT_MAPPING_PROPERTY_NORMALIZED(com.amazonaws.athena.connector.lambda.handlers.GlueMetadataHandler.DATETIME_FORMAT_MAPPING_PROPERTY_NORMALIZED) DYNAMO_DB_FLAG(com.amazonaws.athena.connectors.dynamodb.DynamoDBMetadataHandler.DYNAMO_DB_FLAG) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) ZoneOffset(java.time.ZoneOffset) BlockAllocatorImpl(com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl) GetTableRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableRequest) GetTablesResult(com.amazonaws.services.glue.model.GetTablesResult) COLUMN_NAME_MAPPING_PROPERTY(com.amazonaws.athena.connector.lambda.handlers.GlueMetadataHandler.COLUMN_NAME_MAPPING_PROPERTY) RANGE_KEY_FILTER_METADATA(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.RANGE_KEY_FILTER_METADATA) ListSchemasResponse(com.amazonaws.athena.connector.lambda.metadata.ListSchemasResponse) AmazonServiceException(com.amazonaws.AmazonServiceException) ImmutableMap(com.google.common.collect.ImmutableMap) TimeZone(java.util.TimeZone) Split(com.amazonaws.athena.connector.lambda.domain.Split) Column(com.amazonaws.services.glue.model.Column) PARTITION_TYPE_METADATA(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.PARTITION_TYPE_METADATA) EXPRESSION_NAMES_METADATA(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.EXPRESSION_NAMES_METADATA) Instant(java.time.Instant) QUERY_PARTITION_TYPE(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.QUERY_PARTITION_TYPE) Collectors(java.util.stream.Collectors) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Matchers.any(org.mockito.Matchers.any) GetTableLayoutResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse) EXPRESSION_VALUES_METADATA(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.EXPRESSION_VALUES_METADATA) RANGE_KEY_NAME_METADATA(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.RANGE_KEY_NAME_METADATA) List(java.util.List) GetTableLayoutRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest) Matchers.equalTo(org.hamcrest.Matchers.equalTo) Matchers.is(org.hamcrest.Matchers.is) ListSchemasRequest(com.amazonaws.athena.connector.lambda.metadata.ListSchemasRequest) GetSplitsResponse(com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse) Iterables(com.google.common.collect.Iterables) DATETIME_FORMAT_MAPPING_PROPERTY(com.amazonaws.athena.connector.lambda.handlers.GlueMetadataHandler.DATETIME_FORMAT_MAPPING_PROPERTY) SCAN_PARTITION_TYPE(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.SCAN_PARTITION_TYPE) Mock(org.mockito.Mock) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) RunWith(org.junit.runner.RunWith) LocalDateTime(java.time.LocalDateTime) HashMap(java.util.HashMap) Jackson(com.amazonaws.util.json.Jackson) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Range(com.amazonaws.athena.connector.lambda.domain.predicate.Range) MetadataResponse(com.amazonaws.athena.connector.lambda.metadata.MetadataResponse) ImmutableList(com.google.common.collect.ImmutableList) DEFAULT_SCHEMA(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.DEFAULT_SCHEMA) AWSGlue(com.amazonaws.services.glue.AWSGlue) SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) TestName(org.junit.rules.TestName) LocalKeyFactory(com.amazonaws.athena.connector.lambda.security.LocalKeyFactory) ItemUtils(com.amazonaws.services.dynamodbv2.document.ItemUtils) Table(com.amazonaws.services.glue.model.Table) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) INDEX_METADATA(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.INDEX_METADATA) ListTablesResponse(com.amazonaws.athena.connector.lambda.metadata.ListTablesResponse) NON_KEY_FILTER_METADATA(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.NON_KEY_FILTER_METADATA) GetSplitsRequest(com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest) Before(org.junit.Before) HASH_KEY_NAME_METADATA(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.HASH_KEY_NAME_METADATA) Logger(org.slf4j.Logger) AmazonAthena(com.amazonaws.services.athena.AmazonAthena) SortedRangeSet(com.amazonaws.athena.connector.lambda.domain.predicate.SortedRangeSet) AWSSecretsManager(com.amazonaws.services.secretsmanager.AWSSecretsManager) Test(org.junit.Test) UNLIMITED_PAGE_SIZE_VALUE(com.amazonaws.athena.connector.lambda.metadata.ListTablesRequest.UNLIMITED_PAGE_SIZE_VALUE) SEGMENT_ID_PROPERTY(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.SEGMENT_ID_PROPERTY) Mockito.when(org.mockito.Mockito.when) MetadataRequestType(com.amazonaws.athena.connector.lambda.metadata.MetadataRequestType) GetTableResult(com.amazonaws.services.glue.model.GetTableResult) TABLE_METADATA(com.amazonaws.athena.connectors.dynamodb.constants.DynamoDBConstants.TABLE_METADATA) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) StorageDescriptor(com.amazonaws.services.glue.model.StorageDescriptor) Rule(org.junit.Rule) MockitoJUnitRunner(org.mockito.runners.MockitoJUnitRunner) ChronoUnit(java.time.temporal.ChronoUnit) ListTablesRequest(com.amazonaws.athena.connector.lambda.metadata.ListTablesRequest) Database(com.amazonaws.services.glue.model.Database) Collections(java.util.Collections) GetTableResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableResponse) GetSplitsRequest(com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest) HashMap(java.util.HashMap) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) GetTableLayoutResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse) GetSplitsResponse(com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse) GetTableLayoutRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) Test(org.junit.Test)

Example 28 with GetSplitsRequest

use of com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest in project aws-athena-query-federation by awslabs.

the class DynamoDBMetadataHandlerTest method doGetSplitsScan.

@Test
public void doGetSplitsScan() throws Exception {
    GetTableLayoutResponse layoutResponse = handler.doGetTableLayout(allocator, new GetTableLayoutRequest(TEST_IDENTITY, TEST_QUERY_ID, TEST_CATALOG_NAME, TEST_TABLE_NAME, new Constraints(ImmutableMap.of()), SchemaBuilder.newBuilder().build(), Collections.EMPTY_SET));
    GetSplitsRequest req = new GetSplitsRequest(TEST_IDENTITY, TEST_QUERY_ID, TEST_CATALOG_NAME, TEST_TABLE_NAME, layoutResponse.getPartitions(), ImmutableList.of(), new Constraints(new HashMap<>()), null);
    logger.info("doGetSplits: req[{}]", req);
    MetadataResponse rawResponse = handler.doGetSplits(allocator, req);
    assertThat(rawResponse.getRequestType(), equalTo(MetadataRequestType.GET_SPLITS));
    GetSplitsResponse response = (GetSplitsResponse) rawResponse;
    String continuationToken = response.getContinuationToken();
    logger.info("doGetSplits: continuationToken[{}] - numSplits[{}]", continuationToken, response.getSplits().size());
    assertThat(continuationToken == null, is(true));
    Split split = Iterables.getOnlyElement(response.getSplits());
    assertThat(split.getProperty(SEGMENT_ID_PROPERTY), equalTo("0"));
    logger.info("doGetSplitsScan: exit");
}
Also used : Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) GetTableLayoutResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse) GetSplitsRequest(com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest) HashMap(java.util.HashMap) GetSplitsResponse(com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse) GetTableLayoutRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest) MetadataResponse(com.amazonaws.athena.connector.lambda.metadata.MetadataResponse) Split(com.amazonaws.athena.connector.lambda.domain.Split) Test(org.junit.Test)

Example 29 with GetSplitsRequest

use of com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest in project foundry-athena-query-federation-connector by palantir.

the class SplitsFetcher method getSplits.

GetSplitsResponse getSplits(GetSplitsRequest request, SpillLocationFactory spillLocationFactory, EncryptionKey encryptionKey) {
    CatalogLocator locator = FoundryAthenaObjectMapper.objectMapper().convertValue(request.getSchema().getCustomMetadata(), CatalogLocator.class);
    Optional<Filter> filter;
    if (request.getConstraints().getSummary().isEmpty()) {
        filter = Optional.empty();
    } else {
        // we just push down all constraints which will include those for any partition columns
        filter = Optional.of(Filter.and(AndFilter.of(request.getConstraints().getSummary().entrySet().stream().map(entry -> ConstraintConverter.convert(entry.getKey(), entry.getValue())).collect(Collectors.toList()))));
    }
    Set<Split> splits = new HashSet<>();
    Optional<String> pageToken = Optional.empty();
    while (true) {
        GetSlicesResponse response = metadataService.getSlices(authProvider.getAuthHeader(), GetSlicesRequest.builder().locator(locator).filter(filter).nextPageToken(pageToken).build());
        splits.addAll(response.getSlices().stream().map(slice -> slices.toSplit(spillLocationFactory.makeSpillLocation(), encryptionKey, slice)).collect(Collectors.toSet()));
        if (response.getNextPageToken().isPresent()) {
            pageToken = response.getNextPageToken();
        } else {
            log.debug("finished planning splits. number of splits: {}", splits.size());
            return new GetSplitsResponse(request.getCatalogName(), splits);
        }
    }
}
Also used : CatalogLocator(com.palantir.foundry.athena.api.CatalogLocator) GetSplitsResponse(com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse) Filter(com.palantir.foundry.athena.api.Filter) Logger(org.slf4j.Logger) CatalogLocator(com.palantir.foundry.athena.api.CatalogLocator) Split(com.amazonaws.athena.connector.lambda.domain.Split) LoggerFactory(org.slf4j.LoggerFactory) GetSlicesRequest(com.palantir.foundry.athena.api.GetSlicesRequest) Set(java.util.Set) SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation) GetSlicesResponse(com.palantir.foundry.athena.api.GetSlicesResponse) Collectors(java.util.stream.Collectors) AndFilter(com.palantir.foundry.athena.api.AndFilter) HashSet(java.util.HashSet) FoundryAthenaMetadataServiceBlocking(com.palantir.foundry.athena.api.FoundryAthenaMetadataServiceBlocking) EncryptionKey(com.amazonaws.athena.connector.lambda.security.EncryptionKey) Optional(java.util.Optional) GetSplitsRequest(com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest) GetSlicesResponse(com.palantir.foundry.athena.api.GetSlicesResponse) Filter(com.palantir.foundry.athena.api.Filter) AndFilter(com.palantir.foundry.athena.api.AndFilter) GetSplitsResponse(com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse) Split(com.amazonaws.athena.connector.lambda.domain.Split) HashSet(java.util.HashSet)

Example 30 with GetSplitsRequest

use of com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest in project aws-athena-query-federation by awslabs.

the class GetSplitsRequestSerDeTest method beforeTest.

@Before
public void beforeTest() throws IOException {
    String yearCol = "year";
    String monthCol = "month";
    String dayCol = "day";
    Schema schema = SchemaBuilder.newBuilder().addField(yearCol, new ArrowType.Int(32, true)).addField(monthCol, new ArrowType.Int(32, true)).addField(dayCol, new ArrowType.Int(32, true)).addField("col2", new ArrowType.Utf8()).addField("col3", Types.MinorType.FLOAT8.getType()).addField("col4", Types.MinorType.FLOAT8.getType()).addField("col5", Types.MinorType.FLOAT8.getType()).build();
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put("col3", SortedRangeSet.copyOf(Types.MinorType.FLOAT8.getType(), ImmutableList.of(Range.greaterThan(allocator, Types.MinorType.FLOAT8.getType(), -10000D)), false));
    constraintsMap.put("col4", EquatableValueSet.newBuilder(allocator, Types.MinorType.FLOAT8.getType(), false, true).add(1.1D).build());
    constraintsMap.put("col5", new AllOrNoneValueSet(Types.MinorType.FLOAT8.getType(), false, true));
    Constraints constraints = new Constraints(constraintsMap);
    Block partitions = allocator.createBlock(schema);
    int num_partitions = 10;
    for (int i = 0; i < num_partitions; i++) {
        BlockUtils.setValue(partitions.getFieldVector(yearCol), i, 2016 + i);
        BlockUtils.setValue(partitions.getFieldVector(monthCol), i, (i % 12) + 1);
        BlockUtils.setValue(partitions.getFieldVector(dayCol), i, (i % 28) + 1);
    }
    partitions.setRowCount(num_partitions);
    expected = new GetSplitsRequest(federatedIdentity, "test-query-id", "test-catalog", new TableName("test-schema", "test-table"), partitions, ImmutableList.of(yearCol, monthCol, dayCol), constraints, "test-continuation-token");
    String expectedSerDeFile = utils.getResourceOrFail("serde/v2", "GetSplitsRequest.json");
    expectedSerDeText = utils.readAllAsString(expectedSerDeFile).trim();
}
Also used : GetSplitsRequest(com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest) HashMap(java.util.HashMap) AllOrNoneValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.AllOrNoneValueSet) Schema(org.apache.arrow.vector.types.pojo.Schema) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) Block(com.amazonaws.athena.connector.lambda.data.Block) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) AllOrNoneValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.AllOrNoneValueSet) Before(org.junit.Before)

Aggregations

GetSplitsRequest (com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest)46 Test (org.junit.Test)41 GetSplitsResponse (com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse)32 Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)29 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)24 Schema (org.apache.arrow.vector.types.pojo.Schema)24 GetTableLayoutRequest (com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest)17 BlockAllocator (com.amazonaws.athena.connector.lambda.data.BlockAllocator)16 BlockAllocatorImpl (com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl)16 GetTableLayoutResponse (com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse)16 HashMap (java.util.HashMap)16 HashSet (java.util.HashSet)15 MetadataResponse (com.amazonaws.athena.connector.lambda.metadata.MetadataResponse)14 Map (java.util.Map)14 Block (com.amazonaws.athena.connector.lambda.data.Block)13 ResultSet (java.sql.ResultSet)12 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)12 PreparedStatement (java.sql.PreparedStatement)9 ArrayList (java.util.ArrayList)9 Split (com.amazonaws.athena.connector.lambda.domain.Split)8