use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.
the class DynamoDBMetadataHandlerTest method doGetTableLayoutQueryIndex.
@Test
public void doGetTableLayoutQueryIndex() throws Exception {
Map<String, ValueSet> constraintsMap = new HashMap<>();
SortedRangeSet.Builder dateValueSet = SortedRangeSet.newBuilder(Types.MinorType.DATEDAY.getType(), false);
SortedRangeSet.Builder timeValueSet = SortedRangeSet.newBuilder(Types.MinorType.DATEMILLI.getType(), false);
LocalDateTime dateTime = LocalDateTime.of(2019, 9, 23, 11, 18, 37);
// Set to Epoch time
Instant epoch = Instant.MIN;
dateValueSet.add(Range.equal(allocator, Types.MinorType.DATEDAY.getType(), ChronoUnit.DAYS.between(epoch, dateTime.toInstant(ZoneOffset.UTC))));
LocalDateTime dateTime2 = dateTime.plusHours(26);
dateValueSet.add(Range.equal(allocator, Types.MinorType.DATEDAY.getType(), ChronoUnit.DAYS.between(epoch, dateTime2.toInstant(ZoneOffset.UTC))));
long startTime = dateTime.toInstant(ZoneOffset.UTC).toEpochMilli();
long endTime = dateTime2.toInstant(ZoneOffset.UTC).toEpochMilli();
timeValueSet.add(Range.range(allocator, Types.MinorType.DATEMILLI.getType(), startTime, true, endTime, true));
constraintsMap.put("col_4", dateValueSet.build());
constraintsMap.put("col_5", timeValueSet.build());
GetTableLayoutResponse res = handler.doGetTableLayout(allocator, new GetTableLayoutRequest(TEST_IDENTITY, TEST_QUERY_ID, TEST_CATALOG_NAME, TEST_TABLE_NAME, new Constraints(constraintsMap), SchemaBuilder.newBuilder().build(), Collections.EMPTY_SET));
logger.info("doGetTableLayout schema - {}", res.getPartitions().getSchema());
logger.info("doGetTableLayout partitions - {}", res.getPartitions());
assertThat(res.getPartitions().getSchema().getCustomMetadata().get(PARTITION_TYPE_METADATA), equalTo(QUERY_PARTITION_TYPE));
assertThat(res.getPartitions().getSchema().getCustomMetadata().containsKey(INDEX_METADATA), is(true));
assertThat(res.getPartitions().getSchema().getCustomMetadata().get(INDEX_METADATA), equalTo("test_index"));
assertThat(res.getPartitions().getSchema().getCustomMetadata().get(HASH_KEY_NAME_METADATA), equalTo("col_4"));
assertThat(res.getPartitions().getRowCount(), equalTo(2));
assertThat(res.getPartitions().getSchema().getCustomMetadata().get(RANGE_KEY_NAME_METADATA), equalTo("col_5"));
assertThat(res.getPartitions().getSchema().getCustomMetadata().get(RANGE_KEY_FILTER_METADATA), equalTo("(#col_5 >= :v0 AND #col_5 <= :v1)"));
ImmutableMap<String, String> expressionNames = ImmutableMap.of("#col_4", "col_4", "#col_5", "col_5");
assertThat(res.getPartitions().getSchema().getCustomMetadata().get(EXPRESSION_NAMES_METADATA), equalTo(Jackson.toJsonString(expressionNames)));
ImmutableMap<String, AttributeValue> expressionValues = ImmutableMap.of(":v0", ItemUtils.toAttributeValue(startTime), ":v1", ItemUtils.toAttributeValue(endTime));
assertThat(res.getPartitions().getSchema().getCustomMetadata().get(EXPRESSION_VALUES_METADATA), equalTo(Jackson.toJsonString(expressionValues)));
}
use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.
the class ElasticsearchRecordHandlerTest method doReadRecordsNoSpill.
@Test
public void doReadRecordsNoSpill() throws Exception {
logger.info("doReadRecordsNoSpill: enter");
SearchHit[] searchHit = new SearchHit[2];
searchHit[0] = new SearchHit(1);
searchHit[1] = new SearchHit(2);
SearchHits searchHits = new SearchHits(searchHit, new TotalHits(2, TotalHits.Relation.EQUAL_TO), 4);
when(mockResponse.getHits()).thenReturn(searchHits);
Map<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put("myshort", SortedRangeSet.copyOf(Types.MinorType.SMALLINT.getType(), ImmutableList.of(Range.range(allocator, Types.MinorType.SMALLINT.getType(), (short) 1955, false, (short) 1972, true)), false));
List<String> expectedProjection = new ArrayList<>();
mapping.getFields().forEach(field -> expectedProjection.add(field.getName()));
String expectedPredicate = "(_exists_:myshort) AND myshort:({1955 TO 1972])";
ReadRecordsRequest request = new ReadRecordsRequest(fakeIdentity(), "elasticsearch", "queryId-" + System.currentTimeMillis(), new TableName("movies", "mishmash"), mapping, split, new Constraints(constraintsMap), // 100GB don't expect this to spill
100_000_000_000L, 100_000_000_000L);
RecordResponse rawResponse = handler.doReadRecords(allocator, request);
// Capture the SearchRequest object from the call to client.getDocuments().
// The former contains information such as the projection and predicate.
ArgumentCaptor<SearchRequest> argumentCaptor = ArgumentCaptor.forClass(SearchRequest.class);
verify(mockClient).getDocuments(argumentCaptor.capture());
SearchRequest searchRequest = argumentCaptor.getValue();
// Get the actual projection and compare to the expected one.
List<String> actualProjection = ImmutableList.copyOf(searchRequest.source().fetchSource().includes());
assertEquals("Projections do not match", expectedProjection, actualProjection);
// Get the actual predicate and compare to the expected one.
String actualPredicate = searchRequest.source().query().queryName();
assertEquals("Predicates do not match", expectedPredicate, actualPredicate);
assertTrue(rawResponse instanceof ReadRecordsResponse);
ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
logger.info("doReadRecordsNoSpill: rows[{}]", response.getRecordCount());
assertEquals(2, response.getRecords().getRowCount());
for (int i = 0; i < response.getRecords().getRowCount(); ++i) {
logger.info("doReadRecordsNoSpill - Row: {}, {}", i, BlockUtils.rowToString(response.getRecords(), i));
}
logger.info("doReadRecordsNoSpill: exit");
}
use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.
the class DDBPredicateUtils method getBestIndexForPredicates.
/**
* Attempts to pick an optimal index (if any) from the given predicates. Returns the original table index if
* one was not found.
*
* @param table the original table
* @param predicates the predicates
* @return the optimal index if found, otherwise the original table index
*/
public static DynamoDBIndex getBestIndexForPredicates(DynamoDBTable table, List<String> requestedCols, Map<String, ValueSet> predicates) {
Set<String> columnNames = predicates.keySet();
ImmutableList.Builder<DynamoDBIndex> hashKeyMatchesBuilder = ImmutableList.builder();
// create the original table index
DynamoDBIndex tableIndex = new DynamoDBIndex(table.getName(), table.getHashKey(), table.getRangeKey(), ProjectionType.ALL, ImmutableList.of());
// if the original table has a hash key matching a predicate, start with that
if (columnNames.contains(tableIndex.getHashKey())) {
// here, treat table as a special index
hashKeyMatchesBuilder.add(tableIndex);
}
// requested columns must be projected in index
List<DynamoDBIndex> candidateIndices = table.getIndexes().stream().filter(index -> indexContainsAllRequiredColumns(requestedCols, index, table)).collect(Collectors.toList());
// get indices with hash keys that match a predicate
candidateIndices.stream().filter(index -> columnNames.contains(index.getHashKey()) && !getHashKeyAttributeValues(predicates.get(index.getHashKey())).isEmpty()).forEach(hashKeyMatchesBuilder::add);
List<DynamoDBIndex> hashKeyMatches = hashKeyMatchesBuilder.build();
// if the original table has a range key matching a predicate, start with that
ImmutableList.Builder<DynamoDBIndex> rangeKeyMatchesBuilder = ImmutableList.builder();
if (tableIndex.getRangeKey().isPresent() && columnNames.contains(tableIndex.getRangeKey().get())) {
rangeKeyMatchesBuilder.add(tableIndex);
}
// get indices with range keys that match a predicate
candidateIndices.stream().filter(index -> index.getRangeKey().isPresent() && columnNames.contains(index.getRangeKey().get())).forEach(rangeKeyMatchesBuilder::add);
List<DynamoDBIndex> rangeKeyMatches = rangeKeyMatchesBuilder.build();
// return first index where both hash and range key can be specified with predicates
for (DynamoDBIndex index : hashKeyMatches) {
if (rangeKeyMatches.contains(index)) {
return index;
}
}
// else return the first index with a hash key predicate, or the original table if there are none
return hashKeyMatches.isEmpty() ? tableIndex : hashKeyMatches.get(0);
}
use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.
the class DDBPredicateUtilsTest method testGetBestIndexForPredicatesWithNonEqualityPredicate.
@Test
public void testGetBestIndexForPredicatesWithNonEqualityPredicate() {
// non-equality conditions for the hash key
ValueSet rangeValueSet = SortedRangeSet.of(Range.range(new BlockAllocatorImpl(), VARCHAR.getType(), "aaa", true, "bbb", false));
ValueSet singleValueSet = SortedRangeSet.of(Range.equal(new BlockAllocatorImpl(), VARCHAR.getType(), "value"));
DynamoDBTable table = new DynamoDBTable("tableName", "hashKey", Optional.of("sortKey"), ImmutableList.of(new AttributeDefinition("hashKey", "S"), new AttributeDefinition("sortKey", "S"), new AttributeDefinition("col0", "S")), ImmutableList.of(new DynamoDBIndex("col0-gsi", "col0", Optional.empty(), ProjectionType.KEYS_ONLY, ImmutableList.of())), 1000, 10, 5);
assertEquals("tableName", DDBPredicateUtils.getBestIndexForPredicates(table, ImmutableList.of("hashKey", "col0"), ImmutableMap.of("col0", rangeValueSet)).getName());
assertEquals("col0-gsi", DDBPredicateUtils.getBestIndexForPredicates(table, ImmutableList.of("hashKey", "col0"), ImmutableMap.of("col0", singleValueSet)).getName());
}
use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.
the class DDBPredicateUtilsTest method testGetBestIndexForPredicatesWithMultipleIndices.
@Test
public void testGetBestIndexForPredicatesWithMultipleIndices() {
// multiple indices
ValueSet singleValueSet = SortedRangeSet.of(Range.equal(new BlockAllocatorImpl(), VARCHAR.getType(), "value"));
DynamoDBTable table = new DynamoDBTable("tableName", "hashKey", Optional.of("sortKey"), ImmutableList.of(new AttributeDefinition("hashKey", "S"), new AttributeDefinition("sortKey", "S"), new AttributeDefinition("col0", "S"), new AttributeDefinition("col1", "S")), ImmutableList.of(new DynamoDBIndex("col0-gsi", "col0", Optional.empty(), ProjectionType.INCLUDE, ImmutableList.of("col1")), new DynamoDBIndex("col1-gsi", "col1", Optional.empty(), ProjectionType.INCLUDE, ImmutableList.of("col2")), new DynamoDBIndex("col2-lsi", "hashKey", Optional.of("col2"), ProjectionType.ALL, ImmutableList.of())), 1000, 10, 5);
assertEquals("tableName", DDBPredicateUtils.getBestIndexForPredicates(table, ImmutableList.of("hashKey", "col0", "col1", "col2"), ImmutableMap.of("hashKey", singleValueSet)).getName());
assertEquals("tableName", DDBPredicateUtils.getBestIndexForPredicates(table, ImmutableList.of("hashKey", "col0", "col1", "col2"), ImmutableMap.of("col0", singleValueSet)).getName());
assertEquals("tableName", DDBPredicateUtils.getBestIndexForPredicates(table, ImmutableList.of("hashKey", "col0", "col1", "col2"), ImmutableMap.of("col1", singleValueSet)).getName());
assertEquals("tableName", DDBPredicateUtils.getBestIndexForPredicates(table, ImmutableList.of("hashKey", "col0", "col1", "col2"), ImmutableMap.of("col2", singleValueSet)).getName());
assertEquals("col0-gsi", DDBPredicateUtils.getBestIndexForPredicates(table, ImmutableList.of("hashKey", "col0", "col1"), ImmutableMap.of("col0", singleValueSet)).getName());
assertEquals("col1-gsi", DDBPredicateUtils.getBestIndexForPredicates(table, ImmutableList.of("hashKey", "col1", "col2"), ImmutableMap.of("col1", singleValueSet)).getName());
assertEquals("col2-lsi", DDBPredicateUtils.getBestIndexForPredicates(table, ImmutableList.of("hashKey", "col0", "col1"), ImmutableMap.of("hashKey", singleValueSet, "col2", singleValueSet)).getName());
}
Aggregations