use of com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest in project aws-athena-query-federation by awslabs.
the class DataLakeGen2MuxMetadataHandlerTest method doGetSplits.
@Test
public void doGetSplits() {
GetSplitsRequest getSplitsRequest = Mockito.mock(GetSplitsRequest.class);
Mockito.when(getSplitsRequest.getCatalogName()).thenReturn("fakedatabase");
this.jdbcMetadataHandler.doGetSplits(this.allocator, getSplitsRequest);
Mockito.verify(this.dataLakeGen2MetadataHandler, Mockito.times(1)).doGetSplits(Mockito.eq(this.allocator), Mockito.eq(getSplitsRequest));
}
use of com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest in project aws-athena-query-federation by awslabs.
the class DynamoDBMetadataHandlerTest method doGetSplitsQuery.
@Test
public void doGetSplitsQuery() throws Exception {
Map<String, ValueSet> constraintsMap = new HashMap<>();
EquatableValueSet.Builder valueSet = EquatableValueSet.newBuilder(allocator, Types.MinorType.VARCHAR.getType(), true, false);
for (int i = 0; i < 2000; i++) {
valueSet.add("test_str_" + i);
}
constraintsMap.put("col_0", valueSet.build());
GetTableLayoutResponse layoutResponse = handler.doGetTableLayout(allocator, new GetTableLayoutRequest(TEST_IDENTITY, TEST_QUERY_ID, TEST_CATALOG_NAME, TEST_TABLE_NAME, new Constraints(constraintsMap), SchemaBuilder.newBuilder().build(), Collections.EMPTY_SET));
GetSplitsRequest req = new GetSplitsRequest(TEST_IDENTITY, TEST_QUERY_ID, TEST_CATALOG_NAME, TEST_TABLE_NAME, layoutResponse.getPartitions(), ImmutableList.of("col_0"), new Constraints(new HashMap<>()), null);
logger.info("doGetSplits: req[{}]", req);
GetSplitsResponse response = handler.doGetSplits(allocator, req);
assertThat(response.getRequestType(), equalTo(MetadataRequestType.GET_SPLITS));
String continuationToken = response.getContinuationToken();
logger.info("doGetSplits: continuationToken[{}] - numSplits[{}]", continuationToken, response.getSplits().size());
assertThat(continuationToken, equalTo(String.valueOf(MAX_SPLITS_PER_REQUEST - 1)));
assertThat(response.getSplits().size(), equalTo(MAX_SPLITS_PER_REQUEST));
assertThat(response.getSplits().stream().map(split -> split.getProperty("col_0")).distinct().count(), equalTo((long) MAX_SPLITS_PER_REQUEST));
response = handler.doGetSplits(allocator, new GetSplitsRequest(req, continuationToken));
logger.info("doGetSplits: continuationToken[{}] - numSplits[{}]", continuationToken, response.getSplits().size());
assertThat(response.getContinuationToken(), equalTo(null));
assertThat(response.getSplits().size(), equalTo(MAX_SPLITS_PER_REQUEST));
assertThat(response.getSplits().stream().map(split -> split.getProperty("col_0")).distinct().count(), equalTo((long) MAX_SPLITS_PER_REQUEST));
}
use of com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest in project aws-athena-query-federation by awslabs.
the class DynamoDBMetadataHandlerTest method doGetSplitsScan.
@Test
public void doGetSplitsScan() throws Exception {
GetTableLayoutResponse layoutResponse = handler.doGetTableLayout(allocator, new GetTableLayoutRequest(TEST_IDENTITY, TEST_QUERY_ID, TEST_CATALOG_NAME, TEST_TABLE_NAME, new Constraints(ImmutableMap.of()), SchemaBuilder.newBuilder().build(), Collections.EMPTY_SET));
GetSplitsRequest req = new GetSplitsRequest(TEST_IDENTITY, TEST_QUERY_ID, TEST_CATALOG_NAME, TEST_TABLE_NAME, layoutResponse.getPartitions(), ImmutableList.of(), new Constraints(new HashMap<>()), null);
logger.info("doGetSplits: req[{}]", req);
MetadataResponse rawResponse = handler.doGetSplits(allocator, req);
assertThat(rawResponse.getRequestType(), equalTo(MetadataRequestType.GET_SPLITS));
GetSplitsResponse response = (GetSplitsResponse) rawResponse;
String continuationToken = response.getContinuationToken();
logger.info("doGetSplits: continuationToken[{}] - numSplits[{}]", continuationToken, response.getSplits().size());
assertThat(continuationToken == null, is(true));
Split split = Iterables.getOnlyElement(response.getSplits());
assertThat(split.getProperty(SEGMENT_ID_PROPERTY), equalTo("0"));
logger.info("doGetSplitsScan: exit");
}
use of com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest in project foundry-athena-query-federation-connector by palantir.
the class SplitsFetcher method getSplits.
GetSplitsResponse getSplits(GetSplitsRequest request, SpillLocationFactory spillLocationFactory, EncryptionKey encryptionKey) {
CatalogLocator locator = FoundryAthenaObjectMapper.objectMapper().convertValue(request.getSchema().getCustomMetadata(), CatalogLocator.class);
Optional<Filter> filter;
if (request.getConstraints().getSummary().isEmpty()) {
filter = Optional.empty();
} else {
// we just push down all constraints which will include those for any partition columns
filter = Optional.of(Filter.and(AndFilter.of(request.getConstraints().getSummary().entrySet().stream().map(entry -> ConstraintConverter.convert(entry.getKey(), entry.getValue())).collect(Collectors.toList()))));
}
Set<Split> splits = new HashSet<>();
Optional<String> pageToken = Optional.empty();
while (true) {
GetSlicesResponse response = metadataService.getSlices(authProvider.getAuthHeader(), GetSlicesRequest.builder().locator(locator).filter(filter).nextPageToken(pageToken).build());
splits.addAll(response.getSlices().stream().map(slice -> slices.toSplit(spillLocationFactory.makeSpillLocation(), encryptionKey, slice)).collect(Collectors.toSet()));
if (response.getNextPageToken().isPresent()) {
pageToken = response.getNextPageToken();
} else {
log.debug("finished planning splits. number of splits: {}", splits.size());
return new GetSplitsResponse(request.getCatalogName(), splits);
}
}
}
use of com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest in project aws-athena-query-federation by awslabs.
the class GetSplitsRequestSerDeTest method beforeTest.
@Before
public void beforeTest() throws IOException {
String yearCol = "year";
String monthCol = "month";
String dayCol = "day";
Schema schema = SchemaBuilder.newBuilder().addField(yearCol, new ArrowType.Int(32, true)).addField(monthCol, new ArrowType.Int(32, true)).addField(dayCol, new ArrowType.Int(32, true)).addField("col2", new ArrowType.Utf8()).addField("col3", Types.MinorType.FLOAT8.getType()).addField("col4", Types.MinorType.FLOAT8.getType()).addField("col5", Types.MinorType.FLOAT8.getType()).build();
Map<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put("col3", SortedRangeSet.copyOf(Types.MinorType.FLOAT8.getType(), ImmutableList.of(Range.greaterThan(allocator, Types.MinorType.FLOAT8.getType(), -10000D)), false));
constraintsMap.put("col4", EquatableValueSet.newBuilder(allocator, Types.MinorType.FLOAT8.getType(), false, true).add(1.1D).build());
constraintsMap.put("col5", new AllOrNoneValueSet(Types.MinorType.FLOAT8.getType(), false, true));
Constraints constraints = new Constraints(constraintsMap);
Block partitions = allocator.createBlock(schema);
int num_partitions = 10;
for (int i = 0; i < num_partitions; i++) {
BlockUtils.setValue(partitions.getFieldVector(yearCol), i, 2016 + i);
BlockUtils.setValue(partitions.getFieldVector(monthCol), i, (i % 12) + 1);
BlockUtils.setValue(partitions.getFieldVector(dayCol), i, (i % 28) + 1);
}
partitions.setRowCount(num_partitions);
expected = new GetSplitsRequest(federatedIdentity, "test-query-id", "test-catalog", new TableName("test-schema", "test-table"), partitions, ImmutableList.of(yearCol, monthCol, dayCol), constraints, "test-continuation-token");
String expectedSerDeFile = utils.getResourceOrFail("serde/v2", "GetSplitsRequest.json");
expectedSerDeText = utils.readAllAsString(expectedSerDeFile).trim();
}
Aggregations