use of com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation in project aws-athena-query-federation by awslabs.
the class ElasticsearchRecordHandlerTest method doReadRecordsSpill.
@Test
public void doReadRecordsSpill() throws Exception {
logger.info("doReadRecordsSpill: enter");
int batchSize = handler.getQueryBatchSize();
SearchHit[] searchHit1 = new SearchHit[batchSize];
for (int i = 0; i < batchSize; ++i) {
searchHit1[i] = new SearchHit(i + 1);
}
SearchHit[] searchHit2 = new SearchHit[2];
searchHit2[0] = new SearchHit(batchSize + 1);
searchHit2[1] = new SearchHit(batchSize + 2);
SearchHits searchHits1 = new SearchHits(searchHit1, new TotalHits(batchSize, TotalHits.Relation.EQUAL_TO), 4);
SearchHits searchHits2 = new SearchHits(searchHit2, new TotalHits(2, TotalHits.Relation.EQUAL_TO), 4);
when(mockResponse.getHits()).thenReturn(searchHits1, searchHits1, searchHits2, searchHits2);
Map<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put("myshort", SortedRangeSet.copyOf(Types.MinorType.SMALLINT.getType(), ImmutableList.of(Range.range(allocator, Types.MinorType.SMALLINT.getType(), (short) 1955, false, (short) 1972, true)), false));
ReadRecordsRequest request = new ReadRecordsRequest(fakeIdentity(), "elasticsearch", "queryId-" + System.currentTimeMillis(), new TableName("movies", "mishmash"), mapping, split, new Constraints(constraintsMap), // 10KB Expect this to spill
10_000L, 0L);
RecordResponse rawResponse = handler.doReadRecords(allocator, request);
assertTrue(rawResponse instanceof RemoteReadRecordsResponse);
try (RemoteReadRecordsResponse response = (RemoteReadRecordsResponse) rawResponse) {
logger.info("doReadRecordsSpill: remoteBlocks[{}]", response.getRemoteBlocks().size());
assertEquals(3, response.getNumberBlocks());
int blockNum = 0;
for (SpillLocation next : response.getRemoteBlocks()) {
S3SpillLocation spillLocation = (S3SpillLocation) next;
try (Block block = spillReader.read(spillLocation, response.getEncryptionKey(), response.getSchema())) {
logger.info("doReadRecordsSpill: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount());
logger.info("doReadRecordsSpill: {}", BlockUtils.rowToString(block, 0));
assertNotNull(BlockUtils.rowToString(block, 0));
}
}
}
logger.info("doReadRecordsSpill: exit");
}
use of com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation in project aws-athena-query-federation by awslabs.
the class S3BlockSpiller method makeSpillLocation.
/**
* This needs to be thread safe and generate locations in a format of:
* location.0
* location.1
* location.2
* <p>
* The read engine may elect to exploit this naming convention to speed up the pipelining of
* reads while the spiller is still writing. Violating this convention may reduce performance
* or increase calls to S3.
*/
private S3SpillLocation makeSpillLocation() {
S3SpillLocation splitSpillLocation = (S3SpillLocation) spillConfig.getSpillLocation();
if (!splitSpillLocation.isDirectory()) {
throw new RuntimeException("Split's SpillLocation must be a directory because multiple blocks may be spilled.");
}
String blockKey = splitSpillLocation.getKey() + "." + spillNumber.getAndIncrement();
return new S3SpillLocation(splitSpillLocation.getBucket(), blockKey, false);
}
use of com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation in project aws-athena-query-federation by awslabs.
the class MetricsRecordHandlerTest method readMetricsWithConstraint.
@Test
public void readMetricsWithConstraint() throws Exception {
logger.info("readMetricsWithConstraint: enter");
String namespace = "namespace";
String dimName = "dimName";
String dimValue = "dimValye";
int numMetrics = 100;
AtomicLong numCalls = new AtomicLong(0);
when(mockMetrics.listMetrics(any(ListMetricsRequest.class))).thenAnswer((InvocationOnMock invocation) -> {
ListMetricsRequest request = invocation.getArgumentAt(0, ListMetricsRequest.class);
numCalls.incrementAndGet();
// assert that the namespace filter was indeed pushed down
assertEquals(namespace, request.getNamespace());
String nextToken = (request.getNextToken() == null) ? "valid" : null;
List<Metric> metrics = new ArrayList<>();
for (int i = 0; i < numMetrics; i++) {
metrics.add(new Metric().withNamespace(namespace).withMetricName("metric-" + i).withDimensions(new Dimension().withName(dimName).withValue(dimValue)));
metrics.add(new Metric().withNamespace(namespace + i).withMetricName("metric-" + i));
}
return new ListMetricsResult().withNextToken(nextToken).withMetrics(metrics);
});
Map<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put(NAMESPACE_FIELD, makeStringEquals(allocator, namespace));
constraintsMap.put(DIMENSION_NAME_FIELD, makeStringEquals(allocator, dimName));
constraintsMap.put(DIMENSION_VALUE_FIELD, makeStringEquals(allocator, dimValue));
S3SpillLocation spillLocation = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
Split split = Split.newBuilder(spillLocation, keyFactory.create()).build();
ReadRecordsRequest request = new ReadRecordsRequest(identity, "catalog", "queryId-" + System.currentTimeMillis(), METRICS_TABLE_NAME, METRIC_TABLE.getSchema(), split, new Constraints(constraintsMap), 100_000_000_000L, // 100GB don't expect this to spill
100_000_000_000L);
RecordResponse rawResponse = handler.doReadRecords(allocator, request);
assertTrue(rawResponse instanceof ReadRecordsResponse);
ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
logger.info("readMetricsWithConstraint: rows[{}]", response.getRecordCount());
assertEquals(numCalls.get() * numMetrics, response.getRecords().getRowCount());
logger.info("readMetricsWithConstraint: {}", BlockUtils.rowToString(response.getRecords(), 0));
logger.info("readMetricsWithConstraint: exit");
}
Aggregations