use of com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation in project aws-athena-query-federation by awslabs.
the class S3BlockSpillerTest method spillTest.
@Test
public void spillTest() throws IOException {
logger.info("spillTest: enter");
logger.info("spillTest: starting write test");
final ByteHolder byteHolder = new ByteHolder();
when(mockS3.putObject(eq(bucket), anyString(), anyObject(), anyObject())).thenAnswer(new Answer<Object>() {
@Override
public Object answer(InvocationOnMock invocationOnMock) throws Throwable {
InputStream inputStream = (InputStream) invocationOnMock.getArguments()[2];
byteHolder.setBytes(ByteStreams.toByteArray(inputStream));
return mock(PutObjectResult.class);
}
});
SpillLocation blockLocation = blockWriter.write(expected);
if (blockLocation instanceof S3SpillLocation) {
assertEquals(bucket, ((S3SpillLocation) blockLocation).getBucket());
assertEquals(prefix + "/" + requestId + "/" + splitId + ".0", ((S3SpillLocation) blockLocation).getKey());
}
SpillLocation blockLocation2 = blockWriter.write(expected);
if (blockLocation2 instanceof S3SpillLocation) {
assertEquals(bucket, ((S3SpillLocation) blockLocation2).getBucket());
assertEquals(prefix + "/" + requestId + "/" + splitId + ".1", ((S3SpillLocation) blockLocation2).getKey());
}
verify(mockS3, times(1)).putObject(eq(bucket), eq(prefix + "/" + requestId + "/" + splitId + ".0"), anyObject(), anyObject());
verify(mockS3, times(1)).putObject(eq(bucket), eq(prefix + "/" + requestId + "/" + splitId + ".1"), anyObject(), anyObject());
verifyNoMoreInteractions(mockS3);
reset(mockS3);
logger.info("spillTest: Starting read test.");
when(mockS3.getObject(eq(bucket), eq(prefix + "/" + requestId + "/" + splitId + ".1"))).thenAnswer(new Answer<Object>() {
@Override
public Object answer(InvocationOnMock invocationOnMock) throws Throwable {
S3Object mockObject = mock(S3Object.class);
when(mockObject.getObjectContent()).thenReturn(new S3ObjectInputStream(new ByteArrayInputStream(byteHolder.getBytes()), null));
return mockObject;
}
});
Block block = blockWriter.read((S3SpillLocation) blockLocation2, spillConfig.getEncryptionKey(), expected.getSchema());
assertEquals(expected, block);
verify(mockS3, times(1)).getObject(eq(bucket), eq(prefix + "/" + requestId + "/" + splitId + ".1"));
verifyNoMoreInteractions(mockS3);
logger.info("spillTest: exit");
}
use of com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation in project aws-athena-query-federation by awslabs.
the class HbaseRecordHandlerTest method doReadRecordsNoSpill.
@Test
public void doReadRecordsNoSpill() throws Exception {
List<Result> results = TestUtils.makeResults(100);
ResultScanner mockScanner = mock(ResultScanner.class);
when(mockScanner.iterator()).thenReturn(results.iterator());
when(mockClient.scanTable(anyObject(), any(Scan.class), anyObject())).thenAnswer((InvocationOnMock invocationOnMock) -> {
ResultProcessor processor = (ResultProcessor) invocationOnMock.getArguments()[2];
return processor.scan(mockScanner);
});
Map<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put("family1:col3", SortedRangeSet.copyOf(Types.MinorType.BIGINT.getType(), ImmutableList.of(Range.equal(allocator, Types.MinorType.BIGINT.getType(), 1L)), false));
S3SpillLocation splitLoc = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
Split.Builder splitBuilder = Split.newBuilder(splitLoc, keyFactory.create()).add(HBASE_CONN_STR, "fake_con_str").add(START_KEY_FIELD, "fake_start_key").add(END_KEY_FIELD, "fake_end_key").add(REGION_ID_FIELD, "fake_region_id").add(REGION_NAME_FIELD, "fake_region_name");
ReadRecordsRequest request = new ReadRecordsRequest(IDENTITY, DEFAULT_CATALOG, "queryId-" + System.currentTimeMillis(), new TableName(DEFAULT_SCHEMA, TEST_TABLE), schemaForRead, splitBuilder.build(), new Constraints(constraintsMap), // 100GB don't expect this to spill
100_000_000_000L, 100_000_000_000L);
RecordResponse rawResponse = handler.doReadRecords(allocator, request);
assertTrue(rawResponse instanceof ReadRecordsResponse);
ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
logger.info("doReadRecordsNoSpill: rows[{}]", response.getRecordCount());
assertTrue(response.getRecords().getRowCount() == 1);
logger.info("doReadRecordsNoSpill: {}", BlockUtils.rowToString(response.getRecords(), 0));
}
use of com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation in project aws-athena-query-federation by awslabs.
the class JdbcRecordHandlerTest method readWithConstraint.
@Test
public void readWithConstraint() throws SQLException {
ConstraintEvaluator constraintEvaluator = Mockito.mock(ConstraintEvaluator.class);
Mockito.when(constraintEvaluator.apply(Mockito.anyString(), Mockito.any())).thenReturn(true);
TableName inputTableName = new TableName("testSchema", "testTable");
SchemaBuilder expectedSchemaBuilder = SchemaBuilder.newBuilder();
expectedSchemaBuilder.addField(FieldBuilder.newBuilder("testCol1", org.apache.arrow.vector.types.Types.MinorType.INT.getType()).build());
expectedSchemaBuilder.addField(FieldBuilder.newBuilder("testCol2", org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build());
expectedSchemaBuilder.addField(FieldBuilder.newBuilder("testPartitionCol", org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build());
Schema fieldSchema = expectedSchemaBuilder.build();
BlockAllocator allocator = new BlockAllocatorImpl();
S3SpillLocation s3SpillLocation = S3SpillLocation.newBuilder().withIsDirectory(true).build();
Split.Builder splitBuilder = Split.newBuilder(s3SpillLocation, null).add("testPartitionCol", String.valueOf("testPartitionValue"));
Constraints constraints = Mockito.mock(Constraints.class, Mockito.RETURNS_DEEP_STUBS);
String[] schema = { "testCol1", "testCol2" };
int[] columnTypes = { Types.INTEGER, Types.VARCHAR };
Object[][] values = { { 1, "testVal1" }, { 2, "testVal2" } };
AtomicInteger rowNumber = new AtomicInteger(-1);
ResultSet resultSet = mockResultSet(schema, columnTypes, values, rowNumber);
Mockito.when(this.preparedStatement.executeQuery()).thenReturn(resultSet);
SpillConfig spillConfig = Mockito.mock(SpillConfig.class);
Mockito.when(spillConfig.getSpillLocation()).thenReturn(s3SpillLocation);
BlockSpiller s3Spiller = new S3BlockSpiller(this.amazonS3, spillConfig, allocator, fieldSchema, constraintEvaluator);
ReadRecordsRequest readRecordsRequest = new ReadRecordsRequest(this.federatedIdentity, "testCatalog", "testQueryId", inputTableName, fieldSchema, splitBuilder.build(), constraints, 1024, 1024);
Mockito.when(amazonS3.putObject(Mockito.anyString(), Mockito.anyString(), Mockito.any(), Mockito.any())).thenAnswer((Answer<PutObjectResult>) invocation -> {
ByteArrayInputStream byteArrayInputStream = (ByteArrayInputStream) invocation.getArguments()[2];
int n = byteArrayInputStream.available();
byte[] bytes = new byte[n];
byteArrayInputStream.read(bytes, 0, n);
String data = new String(bytes, StandardCharsets.UTF_8);
Assert.assertTrue(data.contains("testVal1") || data.contains("testVal2") || data.contains("testPartitionValue"));
return new PutObjectResult();
});
this.jdbcRecordHandler.readWithConstraint(s3Spiller, readRecordsRequest, queryStatusChecker);
}
use of com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation in project aws-athena-query-federation by awslabs.
the class NeptuneRecordHandlerTest method doReadRecordsSpill.
@Test
public void doReadRecordsSpill() throws Exception {
S3SpillLocation splitLoc = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
allocator = new BlockAllocatorImpl();
// Greater Than filter
HashMap<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put("property1", SortedRangeSet.of(Range.greaterThan(allocator, Types.MinorType.INT.getType(), 9)));
buildGraphTraversal();
ReadRecordsRequest request = new ReadRecordsRequest(IDENTITY, DEFAULT_CATALOG, QUERY_ID, TABLE_NAME, schemaPGVertexForRead, Split.newBuilder(splitLoc, keyFactory.create()).build(), // ~1.5MB so we should see some spill
new Constraints(constraintsMap), // ~1.5MB so we should see some spill
1_500_000L, 0L);
RecordResponse rawResponse = handler.doReadRecords(allocator, request);
assertTrue(rawResponse instanceof RemoteReadRecordsResponse);
try (RemoteReadRecordsResponse response = (RemoteReadRecordsResponse) rawResponse) {
logger.info("doReadRecordsSpill: remoteBlocks[{}]", response.getRemoteBlocks().size());
assertTrue(response.getNumberBlocks() == 1);
int blockNum = 0;
for (SpillLocation next : response.getRemoteBlocks()) {
S3SpillLocation spillLocation = (S3SpillLocation) next;
try (Block block = spillReader.read(spillLocation, response.getEncryptionKey(), response.getSchema())) {
logger.info("doReadRecordsSpill: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount());
logger.info("doReadRecordsSpill: {}", BlockUtils.rowToString(block, 0));
assertNotNull(BlockUtils.rowToString(block, 0));
}
}
}
}
use of com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation in project aws-athena-query-federation by awslabs.
the class NeptuneRecordHandlerTest method invokeAndAssert.
/**
* Used to invoke each test condition and assert
*
* @param constraintMap Constraint Map for Gremlin Query
* @param expectedRecordCount Expected Row Count as per Gremlin Query Response
*/
private void invokeAndAssert(Schema schemaPG, HashMap<String, ValueSet> constraintMap, Integer expectedRecordCount) throws Exception {
S3SpillLocation spillLoc = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
allocator = new BlockAllocatorImpl();
buildGraphTraversal();
ReadRecordsRequest request = new ReadRecordsRequest(IDENTITY, DEFAULT_CATALOG, QUERY_ID, TABLE_NAME, schemaPG, Split.newBuilder(spillLoc, null).build(), new Constraints(constraintMap), 100_000_000_000L, 100_000_000_000L);
RecordResponse rawResponse = handler.doReadRecords(allocator, request);
assertTrue(rawResponse instanceof ReadRecordsResponse);
ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
assertTrue(response.getRecords().getRowCount() == expectedRecordCount);
logger.info("doReadRecordsNoSpill: {}", BlockUtils.rowToString(response.getRecords(), 0));
}
Aggregations