use of com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse in project aws-athena-query-federation by awslabs.
the class DynamoDBRecordHandlerTest method testReadQuerySplit.
@Test
public void testReadQuerySplit() throws Exception {
Map<String, String> expressionNames = ImmutableMap.of("#col_1", "col_1");
Map<String, AttributeValue> expressionValues = ImmutableMap.of(":v0", toAttributeValue(1));
Split split = Split.newBuilder(SPILL_LOCATION, keyFactory.create()).add(TABLE_METADATA, TEST_TABLE).add(HASH_KEY_NAME_METADATA, "col_0").add("col_0", toJsonString(toAttributeValue("test_str_0"))).add(RANGE_KEY_FILTER_METADATA, "#col_1 >= :v0").add(EXPRESSION_NAMES_METADATA, toJsonString(expressionNames)).add(EXPRESSION_VALUES_METADATA, toJsonString(expressionValues)).build();
ReadRecordsRequest request = new ReadRecordsRequest(TEST_IDENTITY, TEST_CATALOG_NAME, TEST_QUERY_ID, TEST_TABLE_NAME, schema, split, new Constraints(ImmutableMap.of()), // too big to spill
100_000_000_000L, 100_000_000_000L);
RecordResponse rawResponse = handler.doReadRecords(allocator, request);
assertTrue(rawResponse instanceof ReadRecordsResponse);
ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
logger.info("testReadQuerySplit: rows[{}]", response.getRecordCount());
assertEquals(2, response.getRecords().getRowCount());
logger.info("testReadQuerySplit: {}", BlockUtils.rowToString(response.getRecords(), 0));
}
use of com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse in project aws-athena-query-federation by awslabs.
the class ConnectorValidator method readRecords.
private static ReadRecordsResponse readRecords(TestConfig testConfig, TableName table, Schema schema, Collection<Split> splits) {
Constraints constraints = parseConstraints(schema, testConfig.getConstraints());
Split split = getRandomElement(splits);
log.info("Executing randomly selected split with properties: {}", split.getProperties());
ReadRecordsResponse records = LambdaRecordProvider.readRecords(testConfig.getCatalogId(), table, constraints, schema, split, testConfig.getRecordFunction(), testConfig.getIdentity());
log.info("Received " + records.getRecordCount() + " records.");
checkState(records.getRecordCount() > 0, "Table " + toQualifiedTableName(table) + " did not return any rows in the tested split, even though an empty constraint was used." + " This can happen if the table is empty but could also indicate an issue." + " Please populate the table or specify a different table.");
log.info("Discovered columns: " + records.getSchema().getFields().stream().map(f -> f.getName() + ":" + f.getType().getTypeID()).collect(Collectors.toList()));
if (records.getRecordCount() == 0) {
return records;
}
log.info("First row of split: " + rowToString(records.getRecords(), 0));
return records;
}
use of com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse in project foundry-athena-query-federation-connector by palantir.
the class FoundryRecordHandlerTest method testReadRecordsSingle.
@Test
void testReadRecordsSingle() throws Exception {
when(readRecordsRequest.getCatalogName()).thenReturn("catalog");
List<List<Object>> data = ImmutableList.of(ImmutableList.of(1, 2), ImmutableList.of(1.1, 2.2));
InputStream stream = ArrowUtils.writeToStream(TestConstants.SCHEMA, ImmutableList.of(ArrowUtils.createBatch(TestConstants.SCHEMA, data)));
when(recordService.fetchSlice(any(), any())).thenReturn(stream);
RecordResponse response = handler.doReadRecords(allocator, readRecordsRequest);
assetRoundTripSerializable(response);
assertThat(response).isInstanceOf(ReadRecordsResponse.class);
ReadRecordsResponse readRecordsResponse = (ReadRecordsResponse) response;
assertThat(TestUtils.readBlockDataAsColumns(TestConstants.SCHEMA, readRecordsResponse.getRecords())).containsExactlyElementsOf(data);
assertThat(s3).isEmpty();
}
use of com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse in project foundry-athena-query-federation-connector by palantir.
the class FoundryRecordHandler method doReadRecords.
@Override
@SuppressWarnings("MustBeClosedChecker")
public RecordResponse doReadRecords(BlockAllocator allocator, ReadRecordsRequest request) throws Exception {
log.info("doReadRecords: {}:{}", request.getSchema(), request.getSplit().getSpillLocation());
log.debug("Reading records with constraints: {}", request.getConstraints());
SpillConfig spillConfig = getSpillConfig(request);
S3Spiller spiller = new S3Spiller(amazonS3, spillConfig, allocator);
List<String> columnNames = request.getSchema().getFields().stream().map(Field::getName).collect(Collectors.toList());
// create a temporary block to obtain a handle to the BufferAllocator and allocator id
BufferAllocator bufferAllocator;
String allocatorId;
try (Block block = allocator.createBlock(request.getSchema())) {
bufferAllocator = block.getFieldVectors().get(0).getAllocator();
allocatorId = block.getAllocatorId();
}
throttlingInvoker.setBlockSpiller(spiller);
try (QueryStatusChecker queryStatusChecker = new QueryStatusChecker(athena, athenaInvoker, request.getQueryId());
InputStream is = throttlingInvoker.invoke(() -> recordService.fetchSlice(foundryAuthProvider.getAuthHeader(), FetchSliceRequest.builder().slice(Slices.INSTANCE.fromSplit(request.getSplit())).columnNames(columnNames).maxBatchSize(SafeLong.of(spillConfig.getMaxBlockBytes())).build()))) {
// we do not auto-close the reader to avoid releasing the buffers before serialization in the case
// the block is held in memory
PeekableArrowStreamReader reader = new PeekableArrowStreamReader(is, bufferAllocator);
VectorSchemaRoot vectorSchemaRoot = reader.getVectorSchemaRoot();
Block block = new Block(allocatorId, request.getSchema(), vectorSchemaRoot);
reader.loadNextBatch();
// spill if we have more blocks to read or the current block is too large to return
if (reader.hasNextBatch() || block.getSize() > spillConfig.getMaxInlineBlockSize()) {
do {
spiller.spillBlock(block);
} while (queryStatusChecker.isQueryRunning() && reader.loadNextBatch());
// we have spilled so we can clean up the reader
reader.close();
return new RemoteReadRecordsResponse(request.getCatalogName(), request.getSchema(), spiller.getSpillLocations(), spillConfig.getEncryptionKey());
} else {
// no more batches so immediately return the block
return new ReadRecordsResponse(request.getCatalogName(), block);
}
}
}
use of com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse in project aws-athena-query-federation by awslabs.
the class ReadRecordsResponseSerDeTest method beforeTest.
@Before
public void beforeTest() throws IOException {
String yearCol = "year";
String monthCol = "month";
String dayCol = "day";
Schema schema = SchemaBuilder.newBuilder().addField(yearCol, new ArrowType.Int(32, true)).addField(monthCol, new ArrowType.Int(32, true)).addField(dayCol, new ArrowType.Int(32, true)).build();
Block records = allocator.createBlock(schema);
int num_records = 10;
for (int i = 0; i < num_records; i++) {
BlockUtils.setValue(records.getFieldVector(yearCol), i, 2016 + i);
BlockUtils.setValue(records.getFieldVector(monthCol), i, (i % 12) + 1);
BlockUtils.setValue(records.getFieldVector(dayCol), i, (i % 28) + 1);
}
records.setRowCount(num_records);
expected = new ReadRecordsResponse("test-catalog", records);
String expectedSerDeFile = utils.getResourceOrFail("serde/v2", "ReadRecordsResponse.json");
expectedSerDeText = utils.readAllAsString(expectedSerDeFile).trim();
}
Aggregations