use of com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse in project aws-athena-query-federation by awslabs.
the class RedisRecordHandlerTest method doReadRecordsHash.
@Test
public void doReadRecordsHash() throws Exception {
// 4 keys per prefix
when(mockSyncCommands.scan(any(ScanCursor.class), any(ScanArgs.class))).then((InvocationOnMock invocationOnMock) -> {
ScanCursor cursor = (ScanCursor) invocationOnMock.getArguments()[0];
if (cursor == null || cursor.getCursor().equals("0")) {
List<String> result = new ArrayList<>();
result.add(UUID.randomUUID().toString());
result.add(UUID.randomUUID().toString());
result.add(UUID.randomUUID().toString());
result.add(UUID.randomUUID().toString());
result.add(UUID.randomUUID().toString());
MockKeyScanCursor<String> scanCursor = new MockKeyScanCursor<>();
scanCursor.setCursor("1");
scanCursor.setKeys(result);
return scanCursor;
} else {
List<String> result = new ArrayList<>();
result.add(UUID.randomUUID().toString());
result.add(UUID.randomUUID().toString());
MockKeyScanCursor<String> scanCursor = new MockKeyScanCursor<>();
scanCursor.setCursor("0");
scanCursor.setKeys(result);
scanCursor.setFinished(true);
return scanCursor;
}
});
// 4 columns per key
AtomicLong intColVal = new AtomicLong(0);
when(mockSyncCommands.hgetall(anyString())).then((InvocationOnMock invocationOnMock) -> {
Map<String, String> result = new HashMap<>();
result.put("intcol", String.valueOf(intColVal.getAndIncrement()));
result.put("stringcol", UUID.randomUUID().toString());
result.put("extracol", UUID.randomUUID().toString());
return result;
});
AtomicLong value = new AtomicLong(0);
when(mockSyncCommands.get(anyString())).thenAnswer((InvocationOnMock invocationOnMock) -> String.valueOf(value.getAndIncrement()));
S3SpillLocation splitLoc = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
Split split = Split.newBuilder(splitLoc, keyFactory.create()).add(REDIS_ENDPOINT_PROP, endpoint).add(KEY_TYPE, KeyType.PREFIX.getId()).add(KEY_PREFIX_TABLE_PROP, "key-*").add(VALUE_TYPE_TABLE_PROP, ValueType.HASH.getId()).build();
Schema schemaForRead = SchemaBuilder.newBuilder().addField("_key_", Types.MinorType.VARCHAR.getType()).addField("intcol", Types.MinorType.INT.getType()).addField("stringcol", Types.MinorType.VARCHAR.getType()).build();
Map<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put("intcol", SortedRangeSet.copyOf(Types.MinorType.INT.getType(), ImmutableList.of(Range.greaterThan(allocator, Types.MinorType.INT.getType(), 1)), false));
ReadRecordsRequest request = new ReadRecordsRequest(IDENTITY, DEFAULT_CATALOG, "queryId-" + System.currentTimeMillis(), TABLE_NAME, schemaForRead, split, new Constraints(constraintsMap), // 100GB don't expect this to spill
100_000_000_000L, 100_000_000_000L);
RecordResponse rawResponse = handler.doReadRecords(allocator, request);
assertTrue(rawResponse instanceof ReadRecordsResponse);
ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
logger.info("doReadRecordsHash: rows[{}]", response.getRecordCount());
logger.info("doReadRecordsHash: {}", BlockUtils.rowToString(response.getRecords(), 0));
assertTrue(response.getRecords().getRowCount() == 5);
assertTrue(response.getRecords().getFields().size() == schemaForRead.getFields().size());
FieldReader keyReader = response.getRecords().getFieldReader(KEY_COLUMN_NAME);
keyReader.setPosition(0);
assertNotNull(keyReader.readText());
FieldReader intCol = response.getRecords().getFieldReader("intcol");
intCol.setPosition(0);
assertNotNull(intCol.readInteger());
FieldReader stringCol = response.getRecords().getFieldReader("stringcol");
stringCol.setPosition(0);
assertNotNull(stringCol.readText());
}
use of com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse in project aws-athena-query-federation by awslabs.
the class TimestreamRecordHandlerTest method readRecordsView.
@Test
public void readRecordsView() throws Exception {
logger.info("readRecordsView - enter");
Schema schemaForReadView = SchemaBuilder.newBuilder().addField("measure_name", Types.MinorType.VARCHAR.getType()).addField("az", Types.MinorType.VARCHAR.getType()).addField("value", Types.MinorType.FLOAT8.getType()).addField("num_samples", Types.MinorType.BIGINT.getType()).addMetadata(VIEW_METADATA_FIELD, "select measure_name, az,sum(\"measure_value::double\") as value, count(*) as num_samples from \"" + DEFAULT_SCHEMA + "\".\"" + TEST_TABLE + "\" group by measure_name, az").build();
String expectedQuery = "WITH t1 AS ( select measure_name, az,sum(\"measure_value::double\") as value, count(*) as num_samples from \"my_schema\".\"my_table\" group by measure_name, az ) SELECT measure_name, az, value, num_samples FROM t1 WHERE (\"az\" IN ('us-east-1a','us-east-1b'))";
QueryResult mockResult = makeMockQueryResult(schemaForReadView, 1_000);
when(mockClient.query(any(QueryRequest.class))).thenAnswer((Answer<QueryResult>) invocationOnMock -> {
QueryRequest request = (QueryRequest) invocationOnMock.getArguments()[0];
assertEquals(expectedQuery, request.getQueryString().replace("\n", ""));
return mockResult;
});
S3SpillLocation splitLoc = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
Split split = Split.newBuilder(splitLoc, null).build();
Map<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put("az", EquatableValueSet.newBuilder(allocator, Types.MinorType.VARCHAR.getType(), true, true).add("us-east-1a").add("us-east-1b").build());
ReadRecordsRequest request = new ReadRecordsRequest(IDENTITY, "default", "queryId-" + System.currentTimeMillis(), new TableName(DEFAULT_SCHEMA, TEST_VIEW), schemaForReadView, split, new Constraints(constraintsMap), // 100GB don't expect this to spill
100_000_000_000L, 100_000_000_000L);
RecordResponse rawResponse = handler.doReadRecords(allocator, request);
ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
logger.info("readRecordsView: rows[{}]", response.getRecordCount());
for (int i = 0; i < response.getRecordCount() && i < 10; i++) {
logger.info("readRecordsView: {}", BlockUtils.rowToString(response.getRecords(), i));
}
logger.info("readRecordsView - exit");
}
use of com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse in project aws-athena-query-federation by awslabs.
the class TPCDSRecordHandlerTest method doReadRecordForTPCDSTIMETypeColumn.
@Test
public void doReadRecordForTPCDSTIMETypeColumn() throws Exception {
for (Table next : Table.getBaseTables()) {
if (next.getName().equals("dbgen_version")) {
table = next;
}
}
SchemaBuilder schemaBuilder = SchemaBuilder.newBuilder();
for (Column nextCol : table.getColumns()) {
schemaBuilder.addField(TPCDSUtils.convertColumn(nextCol));
}
ReadRecordsRequest request = new ReadRecordsRequest(identity, "catalog", "queryId-" + System.currentTimeMillis(), new TableName("tpcds1", table.getName()), schemaBuilder.build(), Split.newBuilder(S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build(), keyFactory.create()).add(SPLIT_NUMBER_FIELD, "0").add(SPLIT_TOTAL_NUMBER_FIELD, "1000").add(SPLIT_SCALE_FACTOR_FIELD, "1").build(), new Constraints(ImmutableMap.of()), 100_000_000_000L, 100_000_000_000L);
RecordResponse rawResponse = handler.doReadRecords(allocator, request);
assertTrue(rawResponse instanceof ReadRecordsResponse);
ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
logger.info("doReadRecordForTPCDSTIMETypeColumn: {}", BlockUtils.rowToString(response.getRecords(), 0));
// TPCDS for `dbgen_version` always generates 1 record.
assertEquals(1, response.getRecords().getRowCount());
logger.info("doReadRecordForTPCDSTIMETypeColumn: exit");
}
use of com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse in project aws-athena-query-federation by awslabs.
the class ExampleRecordHandlerTest method doReadRecordsNoSpill.
@Test
public void doReadRecordsNoSpill() throws Exception {
if (!enableTests) {
// We do this because until you complete the tutorial these tests will fail. When you attempt to publis
// using ../toos/publish.sh ... it will set the publishing flag and force these tests. This is how we
// avoid breaking the build but still have a useful tutorial. We are also duplicateing this block
// on purpose since this is a somewhat odd pattern.
logger.info("doReadRecordsNoSpill: Tests are disabled, to enable them set the 'publishing' environment variable " + "using maven clean install -Dpublishing=true");
return;
}
for (int i = 0; i < 2; i++) {
Map<String, ValueSet> constraintsMap = new HashMap<>();
ReadRecordsRequest request = new ReadRecordsRequest(fakeIdentity(), "catalog", "queryId-" + System.currentTimeMillis(), new TableName("schema", "table"), schemaForRead, Split.newBuilder(makeSpillLocation(), null).add("year", "2017").add("month", "11").add("day", "1").build(), new Constraints(constraintsMap), // 100GB don't expect this to spill
100_000_000_000L, 100_000_000_000L);
RecordResponse rawResponse = handler.doReadRecords(allocator, request);
assertTrue(rawResponse instanceof ReadRecordsResponse);
ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
logger.info("doReadRecordsNoSpill: rows[{}]", response.getRecordCount());
assertTrue(response.getRecords().getRowCount() > 0);
logger.info("doReadRecordsNoSpill: {}", BlockUtils.rowToString(response.getRecords(), 0));
}
}
use of com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse in project aws-athena-query-federation by awslabs.
the class LambdaRecordProvider method readRecords.
/**
* This method builds and executes a ReadRecordsRequest against the specified Lambda function.
*
* @param catalog the catalog name to be passed to Lambda
* @param tableName the schema-qualified table name indicating the table for which splits should be retrieved
* @param constraints the constraints to be applied to the request
* @param schema the schema of the table in question
* @param split the split to be read in this request
* @param recordFunction the name of the Lambda function to call
* @param identity the identity of the caller
* @return the response
*/
public static ReadRecordsResponse readRecords(String catalog, TableName tableName, Constraints constraints, Schema schema, Split split, String recordFunction, FederatedIdentity identity) {
String queryId = generateQueryId();
log.info("Submitting ReadRecordsRequest with ID " + queryId);
try (ReadRecordsRequest request = new ReadRecordsRequest(identity, queryId, catalog, tableName, schema, split, constraints, MAX_BLOCK_SIZE, MAX_INLINE_BLOCK_SIZE)) {
log.info("Submitting request: {}", request);
ReadRecordsResponse response = (ReadRecordsResponse) getService(recordFunction, identity, catalog).call(request);
log.info("Received response: {}", response);
return response;
} catch (Exception e) {
throw new RuntimeException(e);
}
}
Aggregations