use of com.amazonaws.athena.connector.lambda.data.S3BlockSpiller in project aws-athena-query-federation by awslabs.
the class JdbcRecordHandlerTest method readWithConstraint.
@Test
public void readWithConstraint() throws SQLException {
ConstraintEvaluator constraintEvaluator = Mockito.mock(ConstraintEvaluator.class);
Mockito.when(constraintEvaluator.apply(Mockito.anyString(), Mockito.any())).thenReturn(true);
TableName inputTableName = new TableName("testSchema", "testTable");
SchemaBuilder expectedSchemaBuilder = SchemaBuilder.newBuilder();
expectedSchemaBuilder.addField(FieldBuilder.newBuilder("testCol1", org.apache.arrow.vector.types.Types.MinorType.INT.getType()).build());
expectedSchemaBuilder.addField(FieldBuilder.newBuilder("testCol2", org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build());
expectedSchemaBuilder.addField(FieldBuilder.newBuilder("testPartitionCol", org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build());
Schema fieldSchema = expectedSchemaBuilder.build();
BlockAllocator allocator = new BlockAllocatorImpl();
S3SpillLocation s3SpillLocation = S3SpillLocation.newBuilder().withIsDirectory(true).build();
Split.Builder splitBuilder = Split.newBuilder(s3SpillLocation, null).add("testPartitionCol", String.valueOf("testPartitionValue"));
Constraints constraints = Mockito.mock(Constraints.class, Mockito.RETURNS_DEEP_STUBS);
String[] schema = { "testCol1", "testCol2" };
int[] columnTypes = { Types.INTEGER, Types.VARCHAR };
Object[][] values = { { 1, "testVal1" }, { 2, "testVal2" } };
AtomicInteger rowNumber = new AtomicInteger(-1);
ResultSet resultSet = mockResultSet(schema, columnTypes, values, rowNumber);
Mockito.when(this.preparedStatement.executeQuery()).thenReturn(resultSet);
SpillConfig spillConfig = Mockito.mock(SpillConfig.class);
Mockito.when(spillConfig.getSpillLocation()).thenReturn(s3SpillLocation);
BlockSpiller s3Spiller = new S3BlockSpiller(this.amazonS3, spillConfig, allocator, fieldSchema, constraintEvaluator);
ReadRecordsRequest readRecordsRequest = new ReadRecordsRequest(this.federatedIdentity, "testCatalog", "testQueryId", inputTableName, fieldSchema, splitBuilder.build(), constraints, 1024, 1024);
Mockito.when(amazonS3.putObject(Mockito.anyString(), Mockito.anyString(), Mockito.any(), Mockito.any())).thenAnswer((Answer<PutObjectResult>) invocation -> {
ByteArrayInputStream byteArrayInputStream = (ByteArrayInputStream) invocation.getArguments()[2];
int n = byteArrayInputStream.available();
byte[] bytes = new byte[n];
byteArrayInputStream.read(bytes, 0, n);
String data = new String(bytes, StandardCharsets.UTF_8);
Assert.assertTrue(data.contains("testVal1") || data.contains("testVal2") || data.contains("testPartitionValue"));
return new PutObjectResult();
});
this.jdbcRecordHandler.readWithConstraint(s3Spiller, readRecordsRequest, queryStatusChecker);
}
use of com.amazonaws.athena.connector.lambda.data.S3BlockSpiller in project aws-athena-query-federation by awslabs.
the class AbstractTableProviderTest method readTableTest.
@Test
public void readTableTest() {
GetTableRequest request = new GetTableRequest(identity, expectedQuery, expectedCatalog, expectedTableName);
GetTableResponse response = provider.getTable(allocator, request);
assertTrue(response.getSchema().getFields().size() > 1);
Map<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put(idField, EquatableValueSet.newBuilder(allocator, Types.MinorType.VARCHAR.getType(), true, false).add(idValue).build());
Constraints constraints = new Constraints(constraintsMap);
ConstraintEvaluator evaluator = new ConstraintEvaluator(allocator, response.getSchema(), constraints);
S3SpillLocation spillLocation = S3SpillLocation.newBuilder().withBucket("bucket").withPrefix("prefix").withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
ReadRecordsRequest readRequest = new ReadRecordsRequest(identity, expectedCatalog, "queryId", expectedTableName, response.getSchema(), Split.newBuilder(spillLocation, keyFactory.create()).build(), constraints, 100_000_000, 100_000_000);
SpillConfig spillConfig = SpillConfig.newBuilder().withSpillLocation(spillLocation).withMaxBlockBytes(3_000_000).withMaxInlineBlockBytes(0).withRequestId("queryid").withEncryptionKey(keyFactory.create()).build();
setUpRead();
BlockSpiller spiller = new S3BlockSpiller(amazonS3, spillConfig, allocator, response.getSchema(), evaluator);
provider.readWithConstraint(spiller, readRequest, queryStatusChecker);
validateRead(response.getSchema(), blockSpillReader, spiller.getSpillLocations(), spillConfig.getEncryptionKey());
}
use of com.amazonaws.athena.connector.lambda.data.S3BlockSpiller in project aws-athena-query-federation by awslabs.
the class RecordHandler method doReadRecords.
/**
* Used to read the row data associated with the provided Split.
*
* @param allocator Tool for creating and managing Apache Arrow Blocks.
* @param request Details of the read request, including:
* 1. The Split
* 2. The Catalog, Database, and Table the read request is for.
* 3. The filtering predicate (if any)
* 4. The columns required for projection.
* @return A RecordResponse which either a ReadRecordsResponse or a RemoteReadRecordsResponse containing the row
* data for the requested Split.
*/
public RecordResponse doReadRecords(BlockAllocator allocator, ReadRecordsRequest request) throws Exception {
logger.info("doReadRecords: {}:{}", request.getSchema(), request.getSplit().getSpillLocation());
SpillConfig spillConfig = getSpillConfig(request);
try (ConstraintEvaluator evaluator = new ConstraintEvaluator(allocator, request.getSchema(), request.getConstraints());
S3BlockSpiller spiller = new S3BlockSpiller(amazonS3, spillConfig, allocator, request.getSchema(), evaluator);
QueryStatusChecker queryStatusChecker = new QueryStatusChecker(athena, athenaInvoker, request.getQueryId())) {
readWithConstraint(spiller, request, queryStatusChecker);
if (!spiller.spilled()) {
return new ReadRecordsResponse(request.getCatalogName(), spiller.getBlock());
} else {
return new RemoteReadRecordsResponse(request.getCatalogName(), request.getSchema(), spiller.getSpillLocations(), spillConfig.getEncryptionKey());
}
}
}
Aggregations