Search in sources :

Example 46 with ValueSet

use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.

the class DynamoDBMetadataHandlerTest method doGetTableLayoutQueryIndex.

@Test
public void doGetTableLayoutQueryIndex() throws Exception {
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    SortedRangeSet.Builder dateValueSet = SortedRangeSet.newBuilder(Types.MinorType.DATEDAY.getType(), false);
    SortedRangeSet.Builder timeValueSet = SortedRangeSet.newBuilder(Types.MinorType.DATEMILLI.getType(), false);
    LocalDateTime dateTime = LocalDateTime.of(2019, 9, 23, 11, 18, 37);
    // Set to Epoch time
    Instant epoch = Instant.MIN;
    dateValueSet.add(Range.equal(allocator, Types.MinorType.DATEDAY.getType(), ChronoUnit.DAYS.between(epoch, dateTime.toInstant(ZoneOffset.UTC))));
    LocalDateTime dateTime2 = dateTime.plusHours(26);
    dateValueSet.add(Range.equal(allocator, Types.MinorType.DATEDAY.getType(), ChronoUnit.DAYS.between(epoch, dateTime2.toInstant(ZoneOffset.UTC))));
    long startTime = dateTime.toInstant(ZoneOffset.UTC).toEpochMilli();
    long endTime = dateTime2.toInstant(ZoneOffset.UTC).toEpochMilli();
    timeValueSet.add(Range.range(allocator, Types.MinorType.DATEMILLI.getType(), startTime, true, endTime, true));
    constraintsMap.put("col_4", dateValueSet.build());
    constraintsMap.put("col_5", timeValueSet.build());
    GetTableLayoutResponse res = handler.doGetTableLayout(allocator, new GetTableLayoutRequest(TEST_IDENTITY, TEST_QUERY_ID, TEST_CATALOG_NAME, TEST_TABLE_NAME, new Constraints(constraintsMap), SchemaBuilder.newBuilder().build(), Collections.EMPTY_SET));
    logger.info("doGetTableLayout schema - {}", res.getPartitions().getSchema());
    logger.info("doGetTableLayout partitions - {}", res.getPartitions());
    assertThat(res.getPartitions().getSchema().getCustomMetadata().get(PARTITION_TYPE_METADATA), equalTo(QUERY_PARTITION_TYPE));
    assertThat(res.getPartitions().getSchema().getCustomMetadata().containsKey(INDEX_METADATA), is(true));
    assertThat(res.getPartitions().getSchema().getCustomMetadata().get(INDEX_METADATA), equalTo("test_index"));
    assertThat(res.getPartitions().getSchema().getCustomMetadata().get(HASH_KEY_NAME_METADATA), equalTo("col_4"));
    assertThat(res.getPartitions().getRowCount(), equalTo(2));
    assertThat(res.getPartitions().getSchema().getCustomMetadata().get(RANGE_KEY_NAME_METADATA), equalTo("col_5"));
    assertThat(res.getPartitions().getSchema().getCustomMetadata().get(RANGE_KEY_FILTER_METADATA), equalTo("(#col_5 >= :v0 AND #col_5 <= :v1)"));
    ImmutableMap<String, String> expressionNames = ImmutableMap.of("#col_4", "col_4", "#col_5", "col_5");
    assertThat(res.getPartitions().getSchema().getCustomMetadata().get(EXPRESSION_NAMES_METADATA), equalTo(Jackson.toJsonString(expressionNames)));
    ImmutableMap<String, AttributeValue> expressionValues = ImmutableMap.of(":v0", ItemUtils.toAttributeValue(startTime), ":v1", ItemUtils.toAttributeValue(endTime));
    assertThat(res.getPartitions().getSchema().getCustomMetadata().get(EXPRESSION_VALUES_METADATA), equalTo(Jackson.toJsonString(expressionValues)));
}
Also used : LocalDateTime(java.time.LocalDateTime) AttributeValue(com.amazonaws.services.dynamodbv2.model.AttributeValue) HashMap(java.util.HashMap) Instant(java.time.Instant) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) SortedRangeSet(com.amazonaws.athena.connector.lambda.domain.predicate.SortedRangeSet) GetTableLayoutResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse) GetTableLayoutRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) Test(org.junit.Test)

Example 47 with ValueSet

use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.

the class ElasticsearchRecordHandlerTest method doReadRecordsNoSpill.

@Test
public void doReadRecordsNoSpill() throws Exception {
    logger.info("doReadRecordsNoSpill: enter");
    SearchHit[] searchHit = new SearchHit[2];
    searchHit[0] = new SearchHit(1);
    searchHit[1] = new SearchHit(2);
    SearchHits searchHits = new SearchHits(searchHit, new TotalHits(2, TotalHits.Relation.EQUAL_TO), 4);
    when(mockResponse.getHits()).thenReturn(searchHits);
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put("myshort", SortedRangeSet.copyOf(Types.MinorType.SMALLINT.getType(), ImmutableList.of(Range.range(allocator, Types.MinorType.SMALLINT.getType(), (short) 1955, false, (short) 1972, true)), false));
    List<String> expectedProjection = new ArrayList<>();
    mapping.getFields().forEach(field -> expectedProjection.add(field.getName()));
    String expectedPredicate = "(_exists_:myshort) AND myshort:({1955 TO 1972])";
    ReadRecordsRequest request = new ReadRecordsRequest(fakeIdentity(), "elasticsearch", "queryId-" + System.currentTimeMillis(), new TableName("movies", "mishmash"), mapping, split, new Constraints(constraintsMap), // 100GB don't expect this to spill
    100_000_000_000L, 100_000_000_000L);
    RecordResponse rawResponse = handler.doReadRecords(allocator, request);
    // Capture the SearchRequest object from the call to client.getDocuments().
    // The former contains information such as the projection and predicate.
    ArgumentCaptor<SearchRequest> argumentCaptor = ArgumentCaptor.forClass(SearchRequest.class);
    verify(mockClient).getDocuments(argumentCaptor.capture());
    SearchRequest searchRequest = argumentCaptor.getValue();
    // Get the actual projection and compare to the expected one.
    List<String> actualProjection = ImmutableList.copyOf(searchRequest.source().fetchSource().includes());
    assertEquals("Projections do not match", expectedProjection, actualProjection);
    // Get the actual predicate and compare to the expected one.
    String actualPredicate = searchRequest.source().query().queryName();
    assertEquals("Predicates do not match", expectedPredicate, actualPredicate);
    assertTrue(rawResponse instanceof ReadRecordsResponse);
    ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
    logger.info("doReadRecordsNoSpill: rows[{}]", response.getRecordCount());
    assertEquals(2, response.getRecords().getRowCount());
    for (int i = 0; i < response.getRecords().getRowCount(); ++i) {
        logger.info("doReadRecordsNoSpill - Row: {}, {}", i, BlockUtils.rowToString(response.getRecords(), i));
    }
    logger.info("doReadRecordsNoSpill: exit");
}
Also used : TotalHits(org.apache.lucene.search.TotalHits) SearchRequest(org.elasticsearch.action.search.SearchRequest) SearchHit(org.elasticsearch.search.SearchHit) HashMap(java.util.HashMap) ReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse) RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) ArrayList(java.util.ArrayList) Mockito.anyString(org.mockito.Mockito.anyString) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) SearchHits(org.elasticsearch.search.SearchHits) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) Test(org.junit.Test)

Example 48 with ValueSet

use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.

the class DDBPredicateUtils method getBestIndexForPredicates.

/**
 * Attempts to pick an optimal index (if any) from the given predicates. Returns the original table index if
 * one was not found.
 *
 * @param table the original table
 * @param predicates the predicates
 * @return the optimal index if found, otherwise the original table index
 */
public static DynamoDBIndex getBestIndexForPredicates(DynamoDBTable table, List<String> requestedCols, Map<String, ValueSet> predicates) {
    Set<String> columnNames = predicates.keySet();
    ImmutableList.Builder<DynamoDBIndex> hashKeyMatchesBuilder = ImmutableList.builder();
    // create the original table index
    DynamoDBIndex tableIndex = new DynamoDBIndex(table.getName(), table.getHashKey(), table.getRangeKey(), ProjectionType.ALL, ImmutableList.of());
    // if the original table has a hash key matching a predicate, start with that
    if (columnNames.contains(tableIndex.getHashKey())) {
        // here, treat table as a special index
        hashKeyMatchesBuilder.add(tableIndex);
    }
    // requested columns must be projected in index
    List<DynamoDBIndex> candidateIndices = table.getIndexes().stream().filter(index -> indexContainsAllRequiredColumns(requestedCols, index, table)).collect(Collectors.toList());
    // get indices with hash keys that match a predicate
    candidateIndices.stream().filter(index -> columnNames.contains(index.getHashKey()) && !getHashKeyAttributeValues(predicates.get(index.getHashKey())).isEmpty()).forEach(hashKeyMatchesBuilder::add);
    List<DynamoDBIndex> hashKeyMatches = hashKeyMatchesBuilder.build();
    // if the original table has a range key matching a predicate, start with that
    ImmutableList.Builder<DynamoDBIndex> rangeKeyMatchesBuilder = ImmutableList.builder();
    if (tableIndex.getRangeKey().isPresent() && columnNames.contains(tableIndex.getRangeKey().get())) {
        rangeKeyMatchesBuilder.add(tableIndex);
    }
    // get indices with range keys that match a predicate
    candidateIndices.stream().filter(index -> index.getRangeKey().isPresent() && columnNames.contains(index.getRangeKey().get())).forEach(rangeKeyMatchesBuilder::add);
    List<DynamoDBIndex> rangeKeyMatches = rangeKeyMatchesBuilder.build();
    // return first index where both hash and range key can be specified with predicates
    for (DynamoDBIndex index : hashKeyMatches) {
        if (rangeKeyMatches.contains(index)) {
            return index;
        }
    }
    // else return the first index with a hash key predicate, or the original table if there are none
    return hashKeyMatches.isEmpty() ? tableIndex : hashKeyMatches.get(0);
}
Also used : SortedRangeSet(com.amazonaws.athena.connector.lambda.domain.predicate.SortedRangeSet) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) Set(java.util.Set) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) Collectors(java.util.stream.Collectors) ArrayList(java.util.ArrayList) Preconditions.checkState(com.google.common.base.Preconditions.checkState) HashSet(java.util.HashSet) Range(com.amazonaws.athena.connector.lambda.domain.predicate.Range) DynamoDBIndex(com.amazonaws.athena.connectors.dynamodb.model.DynamoDBIndex) List(java.util.List) Stream(java.util.stream.Stream) ImmutableList(com.google.common.collect.ImmutableList) AttributeValue(com.amazonaws.services.dynamodbv2.model.AttributeValue) Map(java.util.Map) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) ItemUtils(com.amazonaws.services.dynamodbv2.document.ItemUtils) DynamoDBTable(com.amazonaws.athena.connectors.dynamodb.model.DynamoDBTable) ProjectionType(com.amazonaws.services.dynamodbv2.model.ProjectionType) Joiner(com.google.common.base.Joiner) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) DynamoDBIndex(com.amazonaws.athena.connectors.dynamodb.model.DynamoDBIndex)

Example 49 with ValueSet

use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.

the class DDBPredicateUtilsTest method testGetBestIndexForPredicatesWithNonEqualityPredicate.

@Test
public void testGetBestIndexForPredicatesWithNonEqualityPredicate() {
    // non-equality conditions for the hash key
    ValueSet rangeValueSet = SortedRangeSet.of(Range.range(new BlockAllocatorImpl(), VARCHAR.getType(), "aaa", true, "bbb", false));
    ValueSet singleValueSet = SortedRangeSet.of(Range.equal(new BlockAllocatorImpl(), VARCHAR.getType(), "value"));
    DynamoDBTable table = new DynamoDBTable("tableName", "hashKey", Optional.of("sortKey"), ImmutableList.of(new AttributeDefinition("hashKey", "S"), new AttributeDefinition("sortKey", "S"), new AttributeDefinition("col0", "S")), ImmutableList.of(new DynamoDBIndex("col0-gsi", "col0", Optional.empty(), ProjectionType.KEYS_ONLY, ImmutableList.of())), 1000, 10, 5);
    assertEquals("tableName", DDBPredicateUtils.getBestIndexForPredicates(table, ImmutableList.of("hashKey", "col0"), ImmutableMap.of("col0", rangeValueSet)).getName());
    assertEquals("col0-gsi", DDBPredicateUtils.getBestIndexForPredicates(table, ImmutableList.of("hashKey", "col0"), ImmutableMap.of("col0", singleValueSet)).getName());
}
Also used : BlockAllocatorImpl(com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl) AttributeDefinition(com.amazonaws.services.dynamodbv2.model.AttributeDefinition) DynamoDBIndex(com.amazonaws.athena.connectors.dynamodb.model.DynamoDBIndex) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) DynamoDBTable(com.amazonaws.athena.connectors.dynamodb.model.DynamoDBTable) Test(org.junit.Test)

Example 50 with ValueSet

use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.

the class DDBPredicateUtilsTest method testGetBestIndexForPredicatesWithMultipleIndices.

@Test
public void testGetBestIndexForPredicatesWithMultipleIndices() {
    // multiple indices
    ValueSet singleValueSet = SortedRangeSet.of(Range.equal(new BlockAllocatorImpl(), VARCHAR.getType(), "value"));
    DynamoDBTable table = new DynamoDBTable("tableName", "hashKey", Optional.of("sortKey"), ImmutableList.of(new AttributeDefinition("hashKey", "S"), new AttributeDefinition("sortKey", "S"), new AttributeDefinition("col0", "S"), new AttributeDefinition("col1", "S")), ImmutableList.of(new DynamoDBIndex("col0-gsi", "col0", Optional.empty(), ProjectionType.INCLUDE, ImmutableList.of("col1")), new DynamoDBIndex("col1-gsi", "col1", Optional.empty(), ProjectionType.INCLUDE, ImmutableList.of("col2")), new DynamoDBIndex("col2-lsi", "hashKey", Optional.of("col2"), ProjectionType.ALL, ImmutableList.of())), 1000, 10, 5);
    assertEquals("tableName", DDBPredicateUtils.getBestIndexForPredicates(table, ImmutableList.of("hashKey", "col0", "col1", "col2"), ImmutableMap.of("hashKey", singleValueSet)).getName());
    assertEquals("tableName", DDBPredicateUtils.getBestIndexForPredicates(table, ImmutableList.of("hashKey", "col0", "col1", "col2"), ImmutableMap.of("col0", singleValueSet)).getName());
    assertEquals("tableName", DDBPredicateUtils.getBestIndexForPredicates(table, ImmutableList.of("hashKey", "col0", "col1", "col2"), ImmutableMap.of("col1", singleValueSet)).getName());
    assertEquals("tableName", DDBPredicateUtils.getBestIndexForPredicates(table, ImmutableList.of("hashKey", "col0", "col1", "col2"), ImmutableMap.of("col2", singleValueSet)).getName());
    assertEquals("col0-gsi", DDBPredicateUtils.getBestIndexForPredicates(table, ImmutableList.of("hashKey", "col0", "col1"), ImmutableMap.of("col0", singleValueSet)).getName());
    assertEquals("col1-gsi", DDBPredicateUtils.getBestIndexForPredicates(table, ImmutableList.of("hashKey", "col1", "col2"), ImmutableMap.of("col1", singleValueSet)).getName());
    assertEquals("col2-lsi", DDBPredicateUtils.getBestIndexForPredicates(table, ImmutableList.of("hashKey", "col0", "col1"), ImmutableMap.of("hashKey", singleValueSet, "col2", singleValueSet)).getName());
}
Also used : BlockAllocatorImpl(com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl) AttributeDefinition(com.amazonaws.services.dynamodbv2.model.AttributeDefinition) DynamoDBIndex(com.amazonaws.athena.connectors.dynamodb.model.DynamoDBIndex) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) DynamoDBTable(com.amazonaws.athena.connectors.dynamodb.model.DynamoDBTable) Test(org.junit.Test)

Aggregations

ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)104 Test (org.junit.Test)66 Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)63 HashMap (java.util.HashMap)48 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)47 Schema (org.apache.arrow.vector.types.pojo.Schema)37 Split (com.amazonaws.athena.connector.lambda.domain.Split)31 Range (com.amazonaws.athena.connector.lambda.domain.predicate.Range)27 ReadRecordsRequest (com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest)27 EquatableValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet)26 ArrayList (java.util.ArrayList)25 Matchers.anyString (org.mockito.Matchers.anyString)25 RecordResponse (com.amazonaws.athena.connector.lambda.records.RecordResponse)24 Block (com.amazonaws.athena.connector.lambda.data.Block)23 S3SpillLocation (com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation)21 RemoteReadRecordsResponse (com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse)18 SchemaBuilder (com.amazonaws.athena.connector.lambda.data.SchemaBuilder)17 ReadRecordsResponse (com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse)17 InvocationOnMock (org.mockito.invocation.InvocationOnMock)17 BlockAllocatorImpl (com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl)13