use of com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest in project aws-athena-query-federation by awslabs.
the class TPCDSRecordHandlerTest method doReadRecordForTPCDSTIMETypeColumn.
@Test
public void doReadRecordForTPCDSTIMETypeColumn() throws Exception {
for (Table next : Table.getBaseTables()) {
if (next.getName().equals("dbgen_version")) {
table = next;
}
}
SchemaBuilder schemaBuilder = SchemaBuilder.newBuilder();
for (Column nextCol : table.getColumns()) {
schemaBuilder.addField(TPCDSUtils.convertColumn(nextCol));
}
ReadRecordsRequest request = new ReadRecordsRequest(identity, "catalog", "queryId-" + System.currentTimeMillis(), new TableName("tpcds1", table.getName()), schemaBuilder.build(), Split.newBuilder(S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build(), keyFactory.create()).add(SPLIT_NUMBER_FIELD, "0").add(SPLIT_TOTAL_NUMBER_FIELD, "1000").add(SPLIT_SCALE_FACTOR_FIELD, "1").build(), new Constraints(ImmutableMap.of()), 100_000_000_000L, 100_000_000_000L);
RecordResponse rawResponse = handler.doReadRecords(allocator, request);
assertTrue(rawResponse instanceof ReadRecordsResponse);
ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
logger.info("doReadRecordForTPCDSTIMETypeColumn: {}", BlockUtils.rowToString(response.getRecords(), 0));
// TPCDS for `dbgen_version` always generates 1 record.
assertEquals(1, response.getRecords().getRowCount());
logger.info("doReadRecordForTPCDSTIMETypeColumn: exit");
}
use of com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest in project aws-athena-query-federation by awslabs.
the class ExampleRecordHandlerTest method doReadRecordsNoSpill.
@Test
public void doReadRecordsNoSpill() throws Exception {
if (!enableTests) {
// We do this because until you complete the tutorial these tests will fail. When you attempt to publis
// using ../toos/publish.sh ... it will set the publishing flag and force these tests. This is how we
// avoid breaking the build but still have a useful tutorial. We are also duplicateing this block
// on purpose since this is a somewhat odd pattern.
logger.info("doReadRecordsNoSpill: Tests are disabled, to enable them set the 'publishing' environment variable " + "using maven clean install -Dpublishing=true");
return;
}
for (int i = 0; i < 2; i++) {
Map<String, ValueSet> constraintsMap = new HashMap<>();
ReadRecordsRequest request = new ReadRecordsRequest(fakeIdentity(), "catalog", "queryId-" + System.currentTimeMillis(), new TableName("schema", "table"), schemaForRead, Split.newBuilder(makeSpillLocation(), null).add("year", "2017").add("month", "11").add("day", "1").build(), new Constraints(constraintsMap), // 100GB don't expect this to spill
100_000_000_000L, 100_000_000_000L);
RecordResponse rawResponse = handler.doReadRecords(allocator, request);
assertTrue(rawResponse instanceof ReadRecordsResponse);
ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
logger.info("doReadRecordsNoSpill: rows[{}]", response.getRecordCount());
assertTrue(response.getRecords().getRowCount() > 0);
logger.info("doReadRecordsNoSpill: {}", BlockUtils.rowToString(response.getRecords(), 0));
}
}
use of com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest in project aws-athena-query-federation by awslabs.
the class LambdaRecordProvider method readRecords.
/**
* This method builds and executes a ReadRecordsRequest against the specified Lambda function.
*
* @param catalog the catalog name to be passed to Lambda
* @param tableName the schema-qualified table name indicating the table for which splits should be retrieved
* @param constraints the constraints to be applied to the request
* @param schema the schema of the table in question
* @param split the split to be read in this request
* @param recordFunction the name of the Lambda function to call
* @param identity the identity of the caller
* @return the response
*/
public static ReadRecordsResponse readRecords(String catalog, TableName tableName, Constraints constraints, Schema schema, Split split, String recordFunction, FederatedIdentity identity) {
String queryId = generateQueryId();
log.info("Submitting ReadRecordsRequest with ID " + queryId);
try (ReadRecordsRequest request = new ReadRecordsRequest(identity, queryId, catalog, tableName, schema, split, constraints, MAX_BLOCK_SIZE, MAX_INLINE_BLOCK_SIZE)) {
log.info("Submitting request: {}", request);
ReadRecordsResponse response = (ReadRecordsResponse) getService(recordFunction, identity, catalog).call(request);
log.info("Received response: {}", response);
return response;
} catch (Exception e) {
throw new RuntimeException(e);
}
}
use of com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest in project aws-athena-query-federation by awslabs.
the class ElasticsearchRecordHandlerTest method doReadRecordsSpill.
@Test
public void doReadRecordsSpill() throws Exception {
logger.info("doReadRecordsSpill: enter");
int batchSize = handler.getQueryBatchSize();
SearchHit[] searchHit1 = new SearchHit[batchSize];
for (int i = 0; i < batchSize; ++i) {
searchHit1[i] = new SearchHit(i + 1);
}
SearchHit[] searchHit2 = new SearchHit[2];
searchHit2[0] = new SearchHit(batchSize + 1);
searchHit2[1] = new SearchHit(batchSize + 2);
SearchHits searchHits1 = new SearchHits(searchHit1, new TotalHits(batchSize, TotalHits.Relation.EQUAL_TO), 4);
SearchHits searchHits2 = new SearchHits(searchHit2, new TotalHits(2, TotalHits.Relation.EQUAL_TO), 4);
when(mockResponse.getHits()).thenReturn(searchHits1, searchHits1, searchHits2, searchHits2);
Map<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put("myshort", SortedRangeSet.copyOf(Types.MinorType.SMALLINT.getType(), ImmutableList.of(Range.range(allocator, Types.MinorType.SMALLINT.getType(), (short) 1955, false, (short) 1972, true)), false));
ReadRecordsRequest request = new ReadRecordsRequest(fakeIdentity(), "elasticsearch", "queryId-" + System.currentTimeMillis(), new TableName("movies", "mishmash"), mapping, split, new Constraints(constraintsMap), // 10KB Expect this to spill
10_000L, 0L);
RecordResponse rawResponse = handler.doReadRecords(allocator, request);
assertTrue(rawResponse instanceof RemoteReadRecordsResponse);
try (RemoteReadRecordsResponse response = (RemoteReadRecordsResponse) rawResponse) {
logger.info("doReadRecordsSpill: remoteBlocks[{}]", response.getRemoteBlocks().size());
assertEquals(3, response.getNumberBlocks());
int blockNum = 0;
for (SpillLocation next : response.getRemoteBlocks()) {
S3SpillLocation spillLocation = (S3SpillLocation) next;
try (Block block = spillReader.read(spillLocation, response.getEncryptionKey(), response.getSchema())) {
logger.info("doReadRecordsSpill: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount());
logger.info("doReadRecordsSpill: {}", BlockUtils.rowToString(block, 0));
assertNotNull(BlockUtils.rowToString(block, 0));
}
}
}
logger.info("doReadRecordsSpill: exit");
}
use of com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest in project aws-athena-query-federation by awslabs.
the class HiveMuxRecordHandlerTest method readWithConstraint.
@Test
public void readWithConstraint() throws SQLException {
BlockSpiller blockSpiller = Mockito.mock(BlockSpiller.class);
ReadRecordsRequest readRecordsRequest = Mockito.mock(ReadRecordsRequest.class);
Mockito.when(readRecordsRequest.getCatalogName()).thenReturn("recordHive");
this.jdbcRecordHandler.readWithConstraint(blockSpiller, readRecordsRequest, queryStatusChecker);
Mockito.verify(this.hiveRecordHandler, Mockito.times(1)).readWithConstraint(Mockito.eq(blockSpiller), Mockito.eq(readRecordsRequest), Mockito.eq(queryStatusChecker));
}
Aggregations