use of com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse in project foundry-athena-query-federation-connector by palantir.
the class FoundryRecordHandler method doReadRecords.
@Override
@SuppressWarnings("MustBeClosedChecker")
public RecordResponse doReadRecords(BlockAllocator allocator, ReadRecordsRequest request) throws Exception {
log.info("doReadRecords: {}:{}", request.getSchema(), request.getSplit().getSpillLocation());
log.debug("Reading records with constraints: {}", request.getConstraints());
SpillConfig spillConfig = getSpillConfig(request);
S3Spiller spiller = new S3Spiller(amazonS3, spillConfig, allocator);
List<String> columnNames = request.getSchema().getFields().stream().map(Field::getName).collect(Collectors.toList());
// create a temporary block to obtain a handle to the BufferAllocator and allocator id
BufferAllocator bufferAllocator;
String allocatorId;
try (Block block = allocator.createBlock(request.getSchema())) {
bufferAllocator = block.getFieldVectors().get(0).getAllocator();
allocatorId = block.getAllocatorId();
}
throttlingInvoker.setBlockSpiller(spiller);
try (QueryStatusChecker queryStatusChecker = new QueryStatusChecker(athena, athenaInvoker, request.getQueryId());
InputStream is = throttlingInvoker.invoke(() -> recordService.fetchSlice(foundryAuthProvider.getAuthHeader(), FetchSliceRequest.builder().slice(Slices.INSTANCE.fromSplit(request.getSplit())).columnNames(columnNames).maxBatchSize(SafeLong.of(spillConfig.getMaxBlockBytes())).build()))) {
// we do not auto-close the reader to avoid releasing the buffers before serialization in the case
// the block is held in memory
PeekableArrowStreamReader reader = new PeekableArrowStreamReader(is, bufferAllocator);
VectorSchemaRoot vectorSchemaRoot = reader.getVectorSchemaRoot();
Block block = new Block(allocatorId, request.getSchema(), vectorSchemaRoot);
reader.loadNextBatch();
// spill if we have more blocks to read or the current block is too large to return
if (reader.hasNextBatch() || block.getSize() > spillConfig.getMaxInlineBlockSize()) {
do {
spiller.spillBlock(block);
} while (queryStatusChecker.isQueryRunning() && reader.loadNextBatch());
// we have spilled so we can clean up the reader
reader.close();
return new RemoteReadRecordsResponse(request.getCatalogName(), request.getSchema(), spiller.getSpillLocations(), spillConfig.getEncryptionKey());
} else {
// no more batches so immediately return the block
return new ReadRecordsResponse(request.getCatalogName(), block);
}
}
}
use of com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse in project aws-athena-query-federation by awslabs.
the class ExampleRecordHandlerTest method doReadRecordsSpill.
@Test
public void doReadRecordsSpill() throws Exception {
logger.info("doReadRecordsSpill: enter");
for (int i = 0; i < 2; i++) {
EncryptionKey encryptionKey = (i % 2 == 0) ? keyFactory.create() : null;
logger.info("doReadRecordsSpill: Using encryptionKey[" + encryptionKey + "]");
Map<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put("col3", SortedRangeSet.copyOf(Types.MinorType.FLOAT8.getType(), ImmutableList.of(Range.greaterThan(allocator, Types.MinorType.FLOAT8.getType(), -10000D)), false));
constraintsMap.put("unknown", EquatableValueSet.newBuilder(allocator, Types.MinorType.FLOAT8.getType(), false, true).add(1.1D).build());
constraintsMap.put("unknown2", new AllOrNoneValueSet(Types.MinorType.FLOAT8.getType(), false, true));
ReadRecordsRequest request = new ReadRecordsRequest(IdentityUtil.fakeIdentity(), "catalog", "queryId-" + System.currentTimeMillis(), new TableName("schema", "table"), schemaForRead, Split.newBuilder(makeSpillLocation(), encryptionKey).add("year", "10").add("month", "10").add("day", "10").build(), new Constraints(constraintsMap), // ~1.5MB so we should see some spill
1_600_000L, 1000L);
ObjectMapperUtil.assertSerialization(request);
RecordResponse rawResponse = recordService.readRecords(request);
ObjectMapperUtil.assertSerialization(rawResponse);
assertTrue(rawResponse instanceof RemoteReadRecordsResponse);
try (RemoteReadRecordsResponse response = (RemoteReadRecordsResponse) rawResponse) {
logger.info("doReadRecordsSpill: remoteBlocks[{}]", response.getRemoteBlocks().size());
assertTrue(response.getNumberBlocks() > 1);
int blockNum = 0;
for (SpillLocation next : response.getRemoteBlocks()) {
S3SpillLocation spillLocation = (S3SpillLocation) next;
try (Block block = spillReader.read(spillLocation, response.getEncryptionKey(), response.getSchema())) {
logger.info("doReadRecordsSpill: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount());
// assertTrue(++blockNum < response.getRemoteBlocks().size() && block.getRowCount() > 10_000);
logger.info("doReadRecordsSpill: {}", BlockUtils.rowToString(block, 0));
assertNotNull(BlockUtils.rowToString(block, 0));
}
}
}
}
logger.info("doReadRecordsSpill: exit");
}
use of com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse in project aws-athena-query-federation by awslabs.
the class HbaseRecordHandlerTest method doReadRecordsSpill.
@Test
public void doReadRecordsSpill() throws Exception {
List<Result> results = TestUtils.makeResults(10_000);
ResultScanner mockScanner = mock(ResultScanner.class);
when(mockScanner.iterator()).thenReturn(results.iterator());
when(mockClient.scanTable(anyObject(), any(Scan.class), anyObject())).thenAnswer((InvocationOnMock invocationOnMock) -> {
ResultProcessor processor = (ResultProcessor) invocationOnMock.getArguments()[2];
return processor.scan(mockScanner);
});
Map<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put("family1:col3", SortedRangeSet.copyOf(Types.MinorType.BIGINT.getType(), ImmutableList.of(Range.greaterThan(allocator, Types.MinorType.BIGINT.getType(), 0L)), true));
S3SpillLocation splitLoc = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
Split.Builder splitBuilder = Split.newBuilder(splitLoc, keyFactory.create()).add(HBASE_CONN_STR, "fake_con_str").add(START_KEY_FIELD, "fake_start_key").add(END_KEY_FIELD, "fake_end_key").add(REGION_ID_FIELD, "fake_region_id").add(REGION_NAME_FIELD, "fake_region_name");
ReadRecordsRequest request = new ReadRecordsRequest(IDENTITY, DEFAULT_CATALOG, "queryId-" + System.currentTimeMillis(), new TableName(DEFAULT_SCHEMA, TEST_TABLE), schemaForRead, splitBuilder.build(), new Constraints(constraintsMap), // ~1.5MB so we should see some spill
1_500_000L, 0L);
RecordResponse rawResponse = handler.doReadRecords(allocator, request);
assertTrue(rawResponse instanceof RemoteReadRecordsResponse);
try (RemoteReadRecordsResponse response = (RemoteReadRecordsResponse) rawResponse) {
logger.info("doReadRecordsSpill: remoteBlocks[{}]", response.getRemoteBlocks().size());
assertTrue(response.getNumberBlocks() > 1);
int blockNum = 0;
for (SpillLocation next : response.getRemoteBlocks()) {
S3SpillLocation spillLocation = (S3SpillLocation) next;
try (Block block = spillReader.read(spillLocation, response.getEncryptionKey(), response.getSchema())) {
logger.info("doReadRecordsSpill: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount());
// assertTrue(++blockNum < response.getRemoteBlocks().size() && block.getRowCount() > 10_000);
logger.info("doReadRecordsSpill: {}", BlockUtils.rowToString(block, 0));
assertNotNull(BlockUtils.rowToString(block, 0));
}
}
}
}
use of com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse in project aws-athena-query-federation by awslabs.
the class RemoteReadRecordsResponseSerDeTest method deserialize.
@Test
public void deserialize() throws IOException {
logger.info("deserialize: enter");
InputStream input = new ByteArrayInputStream(expectedSerDeText.getBytes());
RemoteReadRecordsResponse actual = (RemoteReadRecordsResponse) mapper.readValue(input, FederationResponse.class);
logger.info("deserialize: deserialized[{}]", actual);
assertEquals(expected, actual);
logger.info("deserialize: exit");
}
use of com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse in project aws-athena-query-federation by awslabs.
the class TimestreamRecordHandlerTest method doReadRecordsSpill.
@Test
public void doReadRecordsSpill() throws Exception {
String expectedQuery = "SELECT measure_name, measure_value::double, az, time, hostname, region FROM \"my_schema\".\"my_table\" WHERE (\"az\" IN ('us-east-1a','us-east-1b'))";
QueryResult mockResult = makeMockQueryResult(schemaForRead, 100_000);
when(mockClient.query(any(QueryRequest.class))).thenAnswer((Answer<QueryResult>) invocationOnMock -> {
QueryRequest request = (QueryRequest) invocationOnMock.getArguments()[0];
assertEquals(expectedQuery, request.getQueryString().replace("\n", ""));
return mockResult;
});
Map<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put("az", EquatableValueSet.newBuilder(allocator, Types.MinorType.VARCHAR.getType(), true, true).add("us-east-1a").add("us-east-1b").build());
S3SpillLocation splitLoc = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
Split.Builder splitBuilder = Split.newBuilder(splitLoc, keyFactory.create());
ReadRecordsRequest request = new ReadRecordsRequest(IDENTITY, DEFAULT_CATALOG, "queryId-" + System.currentTimeMillis(), new TableName(DEFAULT_SCHEMA, TEST_TABLE), schemaForRead, splitBuilder.build(), new Constraints(constraintsMap), // ~1.5MB so we should see some spill
1_500_000L, 0L);
RecordResponse rawResponse = handler.doReadRecords(allocator, request);
assertTrue(rawResponse instanceof RemoteReadRecordsResponse);
try (RemoteReadRecordsResponse response = (RemoteReadRecordsResponse) rawResponse) {
logger.info("doReadRecordsSpill: remoteBlocks[{}]", response.getRemoteBlocks().size());
assertTrue(response.getNumberBlocks() > 1);
int blockNum = 0;
for (SpillLocation next : response.getRemoteBlocks()) {
S3SpillLocation spillLocation = (S3SpillLocation) next;
try (Block block = spillReader.read(spillLocation, response.getEncryptionKey(), response.getSchema())) {
logger.info("doReadRecordsSpill: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount());
// assertTrue(++blockNum < response.getRemoteBlocks().size() && block.getRowCount() > 10_000);
logger.info("doReadRecordsSpill: {}", BlockUtils.rowToString(block, 0));
assertNotNull(BlockUtils.rowToString(block, 0));
}
}
}
}
Aggregations