use of com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation in project aws-athena-query-federation by awslabs.
the class AbstractTableProviderTest method readTableTest.
@Test
public void readTableTest() {
GetTableRequest request = new GetTableRequest(identity, expectedQuery, expectedCatalog, expectedTableName);
GetTableResponse response = provider.getTable(allocator, request);
assertTrue(response.getSchema().getFields().size() > 1);
Map<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put(idField, EquatableValueSet.newBuilder(allocator, Types.MinorType.VARCHAR.getType(), true, false).add(idValue).build());
Constraints constraints = new Constraints(constraintsMap);
ConstraintEvaluator evaluator = new ConstraintEvaluator(allocator, response.getSchema(), constraints);
S3SpillLocation spillLocation = S3SpillLocation.newBuilder().withBucket("bucket").withPrefix("prefix").withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
ReadRecordsRequest readRequest = new ReadRecordsRequest(identity, expectedCatalog, "queryId", expectedTableName, response.getSchema(), Split.newBuilder(spillLocation, keyFactory.create()).build(), constraints, 100_000_000, 100_000_000);
SpillConfig spillConfig = SpillConfig.newBuilder().withSpillLocation(spillLocation).withMaxBlockBytes(3_000_000).withMaxInlineBlockBytes(0).withRequestId("queryid").withEncryptionKey(keyFactory.create()).build();
setUpRead();
BlockSpiller spiller = new S3BlockSpiller(amazonS3, spillConfig, allocator, response.getSchema(), evaluator);
provider.readWithConstraint(spiller, readRequest, queryStatusChecker);
validateRead(response.getSchema(), blockSpillReader, spiller.getSpillLocations(), spillConfig.getEncryptionKey());
}
use of com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation in project aws-athena-query-federation by awslabs.
the class AbstractTableProviderTest method validateRead.
protected void validateRead(Schema schema, S3BlockSpillReader reader, List<SpillLocation> locations, EncryptionKey encryptionKey) {
int blockNum = 0;
int rowNum = 0;
for (SpillLocation next : locations) {
S3SpillLocation spillLocation = (S3SpillLocation) next;
try (Block block = reader.read(spillLocation, encryptionKey, schema)) {
logger.info("validateRead: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount());
for (int i = 0; i < block.getRowCount(); i++) {
logger.info("validateRead: {}", BlockUtils.rowToString(block, i));
rowNum++;
validateRow(block, i);
}
} catch (Exception ex) {
throw new RuntimeException(ex);
}
}
assertEquals(getExpectedRows(), rowNum);
}
use of com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation in project aws-athena-query-federation by awslabs.
the class DocDBRecordHandlerTest method nestedStructTest.
@Test
public void nestedStructTest() throws Exception {
List<Document> documents = new ArrayList<>();
Document result = new Document();
documents.add(result);
Document listStruct1 = new Document();
listStruct1.put("SomeSubStruct", "someSubStruct1");
List<Document> subList = new ArrayList<>();
Document listSubStruct1 = new Document();
listSubStruct1.put("SomeSubSubStruct", "someSubSubStruct");
subList.add(listSubStruct1);
listStruct1.put("SomeSubList", subList);
Document listStruct2 = new Document();
listStruct2.put("SomeSubStruct1", "someSubStruct2");
List<Document> list = new ArrayList<>();
list.add(listStruct1);
list.add(listStruct1);
Document structWithList = new Document();
structWithList.put("SomeList", list);
Document structWithNullList = new Document();
structWithNullList.put("SomeNullList", null);
Document simpleSubStruct = new Document();
simpleSubStruct.put("SomeSimpleSubStruct", "someSimpleSubStruct");
structWithList.put("SimpleSubStruct", simpleSubStruct);
structWithList.put("SimpleSubStructNullList", structWithNullList);
result.put("ComplexStruct", structWithList);
Document simpleStruct = new Document();
simpleStruct.put("SomeSimpleStruct", "someSimpleStruct");
result.put("SimpleStruct", simpleStruct);
when(mockCollection.find()).thenReturn(mockIterable);
when(mockIterable.limit(anyInt())).thenReturn(mockIterable);
when(mockIterable.maxScan(anyInt())).thenReturn(mockIterable);
when(mockIterable.batchSize(anyInt())).thenReturn(mockIterable);
when(mockIterable.iterator()).thenReturn(new StubbingCursor(documents.iterator()));
GetTableRequest req = new GetTableRequest(IDENTITY, QUERY_ID, DEFAULT_CATALOG, TABLE_NAME);
GetTableResponse res = mdHandler.doGetTable(allocator, req);
logger.info("doGetTable - {}", res);
when(mockCollection.find(any(Document.class))).thenAnswer((InvocationOnMock invocationOnMock) -> {
logger.info("doReadRecordsNoSpill: query[{}]", invocationOnMock.getArguments()[0]);
return mockIterable;
});
when(mockIterable.projection(any(Document.class))).thenAnswer((InvocationOnMock invocationOnMock) -> {
logger.info("doReadRecordsNoSpill: projection[{}]", invocationOnMock.getArguments()[0]);
return mockIterable;
});
when(mockIterable.batchSize(anyInt())).thenReturn(mockIterable);
when(mockIterable.iterator()).thenReturn(new StubbingCursor(documents.iterator()));
Map<String, ValueSet> constraintsMap = new HashMap<>();
S3SpillLocation splitLoc = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
ReadRecordsRequest request = new ReadRecordsRequest(IDENTITY, DEFAULT_CATALOG, "queryId-" + System.currentTimeMillis(), TABLE_NAME, res.getSchema(), Split.newBuilder(splitLoc, keyFactory.create()).add(DOCDB_CONN_STR, CONNECTION_STRING).build(), new Constraints(constraintsMap), // 100GB don't expect this to spill
100_000_000_000L, 100_000_000_000L);
RecordResponse rawResponse = handler.doReadRecords(allocator, request);
assertTrue(rawResponse instanceof ReadRecordsResponse);
ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
logger.info("doReadRecordsNoSpill: rows[{}]", response.getRecordCount());
logger.info("doReadRecordsNoSpill: {}", BlockUtils.rowToString(response.getRecords(), 0));
assertTrue(response.getRecordCount() == 1);
String expectedString = "[ComplexStruct : {[SomeList : {{[SomeSubStruct : someSubStruct1]," + "[SomeSubList : {{[SomeSubSubStruct : someSubSubStruct]}}]}," + "{[SomeSubStruct : someSubStruct1],[SomeSubList : {{[SomeSubSubStruct : someSubSubStruct]}}]}}]," + "[SimpleSubStruct : {[SomeSimpleSubStruct : someSimpleSubStruct]}]," + "[SimpleSubStructNullList : {[SomeNullList : null]}]}], [SimpleStruct : {[SomeSimpleStruct : someSimpleStruct]}]";
assertEquals(expectedString, BlockUtils.rowToString(response.getRecords(), 0));
}
use of com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation in project aws-athena-query-federation by awslabs.
the class DocDBRecordHandlerTest method doReadRecordsNoSpill.
@Test
public void doReadRecordsNoSpill() throws Exception {
List<Document> documents = new ArrayList<>();
int docNum = 11;
Document doc1 = DocumentGenerator.makeRandomRow(schemaForRead.getFields(), docNum++);
documents.add(doc1);
doc1.put("col3", 22.0D);
Document doc2 = DocumentGenerator.makeRandomRow(schemaForRead.getFields(), docNum++);
documents.add(doc2);
doc2.put("col3", 22.0D);
Document doc3 = DocumentGenerator.makeRandomRow(schemaForRead.getFields(), docNum++);
documents.add(doc3);
doc3.put("col3", 21.0D);
doc3.put("unsupported", new UnsupportedType());
when(mockCollection.find(any(Document.class))).thenAnswer((InvocationOnMock invocationOnMock) -> {
logger.info("doReadRecordsNoSpill: query[{}]", invocationOnMock.getArguments()[0]);
return mockIterable;
});
when(mockIterable.projection(any(Document.class))).thenAnswer((InvocationOnMock invocationOnMock) -> {
logger.info("doReadRecordsNoSpill: projection[{}]", invocationOnMock.getArguments()[0]);
return mockIterable;
});
when(mockIterable.batchSize(anyInt())).thenReturn(mockIterable);
when(mockIterable.iterator()).thenReturn(new StubbingCursor(documents.iterator()));
Map<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put("col3", SortedRangeSet.copyOf(Types.MinorType.FLOAT8.getType(), ImmutableList.of(Range.equal(allocator, Types.MinorType.FLOAT8.getType(), 22.0D)), false));
S3SpillLocation splitLoc = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
ReadRecordsRequest request = new ReadRecordsRequest(IDENTITY, DEFAULT_CATALOG, "queryId-" + System.currentTimeMillis(), TABLE_NAME, schemaForRead, Split.newBuilder(splitLoc, keyFactory.create()).add(DOCDB_CONN_STR, CONNECTION_STRING).build(), new Constraints(constraintsMap), // 100GB don't expect this to spill
100_000_000_000L, 100_000_000_000L);
RecordResponse rawResponse = handler.doReadRecords(allocator, request);
assertTrue(rawResponse instanceof ReadRecordsResponse);
ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
logger.info("doReadRecordsNoSpill: rows[{}]", response.getRecordCount());
assertTrue(response.getRecords().getRowCount() == 2);
logger.info("doReadRecordsNoSpill: {}", BlockUtils.rowToString(response.getRecords(), 0));
}
use of com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation in project aws-athena-query-federation by awslabs.
the class DocDBRecordHandlerTest method doReadRecordsSpill.
@Test
public void doReadRecordsSpill() throws Exception {
List<Document> documents = new ArrayList<>();
for (int docNum = 0; docNum < 20_000; docNum++) {
documents.add(DocumentGenerator.makeRandomRow(schemaForRead.getFields(), docNum));
}
when(mockCollection.find(any(Document.class))).thenAnswer((InvocationOnMock invocationOnMock) -> {
logger.info("doReadRecordsNoSpill: query[{}]", invocationOnMock.getArguments()[0]);
return mockIterable;
});
when(mockIterable.projection(any(Document.class))).thenAnswer((InvocationOnMock invocationOnMock) -> {
logger.info("doReadRecordsNoSpill: projection[{}]", invocationOnMock.getArguments()[0]);
return mockIterable;
});
when(mockIterable.batchSize(anyInt())).thenReturn(mockIterable);
when(mockIterable.iterator()).thenReturn(new StubbingCursor(documents.iterator()));
Map<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put("col3", SortedRangeSet.copyOf(Types.MinorType.FLOAT8.getType(), ImmutableList.of(Range.greaterThan(allocator, Types.MinorType.FLOAT8.getType(), -10000D)), false));
S3SpillLocation splitLoc = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
ReadRecordsRequest request = new ReadRecordsRequest(IDENTITY, DEFAULT_CATALOG, "queryId-" + System.currentTimeMillis(), TABLE_NAME, schemaForRead, Split.newBuilder(splitLoc, keyFactory.create()).add(DOCDB_CONN_STR, CONNECTION_STRING).build(), new Constraints(constraintsMap), // ~1.5MB so we should see some spill
1_500_000L, 0L);
RecordResponse rawResponse = handler.doReadRecords(allocator, request);
assertTrue(rawResponse instanceof RemoteReadRecordsResponse);
try (RemoteReadRecordsResponse response = (RemoteReadRecordsResponse) rawResponse) {
logger.info("doReadRecordsSpill: remoteBlocks[{}]", response.getRemoteBlocks().size());
assertTrue(response.getNumberBlocks() > 1);
int blockNum = 0;
for (SpillLocation next : response.getRemoteBlocks()) {
S3SpillLocation spillLocation = (S3SpillLocation) next;
try (Block block = spillReader.read(spillLocation, response.getEncryptionKey(), response.getSchema())) {
logger.info("doReadRecordsSpill: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount());
// assertTrue(++blockNum < response.getRemoteBlocks().size() && block.getRowCount() > 10_000);
logger.info("doReadRecordsSpill: {}", BlockUtils.rowToString(block, 0));
assertNotNull(BlockUtils.rowToString(block, 0));
}
}
}
}
Aggregations