use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class DocDBMetadataHandlerTest method doGetSplits.
@Test
public void doGetSplits() {
List<String> partitionCols = new ArrayList<>();
Block partitions = BlockUtils.newBlock(allocator, PARTITION_ID, Types.MinorType.INT.getType(), 0);
String continuationToken = null;
GetSplitsRequest originalReq = new GetSplitsRequest(IDENTITY, QUERY_ID, DEFAULT_CATALOG, TABLE_NAME, partitions, partitionCols, new Constraints(new HashMap<>()), null);
GetSplitsRequest req = new GetSplitsRequest(originalReq, continuationToken);
logger.info("doGetSplits: req[{}]", req);
MetadataResponse rawResponse = handler.doGetSplits(allocator, req);
assertEquals(MetadataRequestType.GET_SPLITS, rawResponse.getRequestType());
GetSplitsResponse response = (GetSplitsResponse) rawResponse;
continuationToken = response.getContinuationToken();
logger.info("doGetSplits: continuationToken[{}] - numSplits[{}]", new Object[] { continuationToken, response.getSplits().size() });
assertTrue("Continuation criteria violated", response.getSplits().size() == 1);
assertTrue("Continuation criteria violated", response.getContinuationToken() == null);
}
use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class DocDBRecordHandlerTest method doReadRecordsSpill.
@Test
public void doReadRecordsSpill() throws Exception {
List<Document> documents = new ArrayList<>();
for (int docNum = 0; docNum < 20_000; docNum++) {
documents.add(DocumentGenerator.makeRandomRow(schemaForRead.getFields(), docNum));
}
when(mockCollection.find(any(Document.class))).thenAnswer((InvocationOnMock invocationOnMock) -> {
logger.info("doReadRecordsNoSpill: query[{}]", invocationOnMock.getArguments()[0]);
return mockIterable;
});
when(mockIterable.projection(any(Document.class))).thenAnswer((InvocationOnMock invocationOnMock) -> {
logger.info("doReadRecordsNoSpill: projection[{}]", invocationOnMock.getArguments()[0]);
return mockIterable;
});
when(mockIterable.batchSize(anyInt())).thenReturn(mockIterable);
when(mockIterable.iterator()).thenReturn(new StubbingCursor(documents.iterator()));
Map<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put("col3", SortedRangeSet.copyOf(Types.MinorType.FLOAT8.getType(), ImmutableList.of(Range.greaterThan(allocator, Types.MinorType.FLOAT8.getType(), -10000D)), false));
S3SpillLocation splitLoc = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
ReadRecordsRequest request = new ReadRecordsRequest(IDENTITY, DEFAULT_CATALOG, "queryId-" + System.currentTimeMillis(), TABLE_NAME, schemaForRead, Split.newBuilder(splitLoc, keyFactory.create()).add(DOCDB_CONN_STR, CONNECTION_STRING).build(), new Constraints(constraintsMap), // ~1.5MB so we should see some spill
1_500_000L, 0L);
RecordResponse rawResponse = handler.doReadRecords(allocator, request);
assertTrue(rawResponse instanceof RemoteReadRecordsResponse);
try (RemoteReadRecordsResponse response = (RemoteReadRecordsResponse) rawResponse) {
logger.info("doReadRecordsSpill: remoteBlocks[{}]", response.getRemoteBlocks().size());
assertTrue(response.getNumberBlocks() > 1);
int blockNum = 0;
for (SpillLocation next : response.getRemoteBlocks()) {
S3SpillLocation spillLocation = (S3SpillLocation) next;
try (Block block = spillReader.read(spillLocation, response.getEncryptionKey(), response.getSchema())) {
logger.info("doReadRecordsSpill: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount());
// assertTrue(++blockNum < response.getRemoteBlocks().size() && block.getRowCount() > 10_000);
logger.info("doReadRecordsSpill: {}", BlockUtils.rowToString(block, 0));
assertNotNull(BlockUtils.rowToString(block, 0));
}
}
}
}
use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class CloudwatchRecordHandlerTest method doReadRecordsSpill.
@Test
public void doReadRecordsSpill() throws Exception {
logger.info("doReadRecordsSpill: enter");
Map<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put("time", SortedRangeSet.of(Range.range(allocator, Types.MinorType.BIGINT.getType(), 100L, true, 100_000_000L, true)));
ReadRecordsRequest request = new ReadRecordsRequest(identity, "catalog", "queryId-" + System.currentTimeMillis(), new TableName("schema", "table"), schemaForRead, Split.newBuilder(S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build(), keyFactory.create()).add(CloudwatchMetadataHandler.LOG_STREAM_FIELD, "table").build(), new Constraints(constraintsMap), // ~1.5MB so we should see some spill
1_500_000L, 0);
RecordResponse rawResponse = handler.doReadRecords(allocator, request);
assertTrue(rawResponse instanceof RemoteReadRecordsResponse);
try (RemoteReadRecordsResponse response = (RemoteReadRecordsResponse) rawResponse) {
logger.info("doReadRecordsSpill: remoteBlocks[{}]", response.getRemoteBlocks().size());
assertTrue(response.getNumberBlocks() > 1);
int blockNum = 0;
for (SpillLocation next : response.getRemoteBlocks()) {
S3SpillLocation spillLocation = (S3SpillLocation) next;
try (Block block = spillReader.read(spillLocation, response.getEncryptionKey(), response.getSchema())) {
logger.info("doReadRecordsSpill: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount());
// assertTrue(++blockNum < response.getRemoteBlocks().size() && block.getRowCount() > 10_000);
logger.info("doReadRecordsSpill: {}", BlockUtils.rowToString(block, 0));
assertNotNull(BlockUtils.rowToString(block, 0));
}
}
}
logger.info("doReadRecordsSpill: exit");
}
use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class MetricsMetadataHandlerTest method doGetMetricsSplits.
@Test
public void doGetMetricsSplits() throws Exception {
logger.info("doGetMetricsSplits: enter");
Schema schema = SchemaBuilder.newBuilder().addIntField("partitionId").build();
Block partitions = allocator.createBlock(schema);
BlockUtils.setValue(partitions.getFieldVector("partitionId"), 1, 1);
partitions.setRowCount(1);
String continuationToken = null;
GetSplitsRequest originalReq = new GetSplitsRequest(identity, "queryId", "catalog_name", new TableName(defaultSchema, "metrics"), partitions, Collections.singletonList("partitionId"), new Constraints(new HashMap<>()), continuationToken);
int numContinuations = 0;
do {
GetSplitsRequest req = new GetSplitsRequest(originalReq, continuationToken);
logger.info("doGetMetricsSplits: req[{}]", req);
MetadataResponse rawResponse = handler.doGetSplits(allocator, req);
assertEquals(MetadataRequestType.GET_SPLITS, rawResponse.getRequestType());
GetSplitsResponse response = (GetSplitsResponse) rawResponse;
continuationToken = response.getContinuationToken();
logger.info("doGetMetricsSplits: continuationToken[{}] - numSplits[{}]", continuationToken, response.getSplits().size());
assertEquals(1, response.getSplits().size());
if (continuationToken != null) {
numContinuations++;
}
} while (continuationToken != null);
assertEquals(0, numContinuations);
logger.info("doGetMetricsSplits: exit");
}
use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class DocDBRecordHandler method readWithConstraint.
/**
* Scans DocumentDB using the scan settings set on the requested Split by DocDBeMetadataHandler.
*
* @see RecordHandler
*/
@Override
protected void readWithConstraint(BlockSpiller spiller, ReadRecordsRequest recordsRequest, QueryStatusChecker queryStatusChecker) {
TableName tableName = recordsRequest.getTableName();
Map<String, ValueSet> constraintSummary = recordsRequest.getConstraints().getSummary();
MongoClient client = getOrCreateConn(recordsRequest.getSplit());
MongoDatabase db = client.getDatabase(tableName.getSchemaName());
MongoCollection<Document> table = db.getCollection(tableName.getTableName());
Document query = QueryUtils.makeQuery(recordsRequest.getSchema(), constraintSummary);
Document output = QueryUtils.makeProjection(recordsRequest.getSchema());
logger.info("readWithConstraint: query[{}] projection[{}]", query, output);
final MongoCursor<Document> iterable = table.find(query).projection(output).batchSize(MONGO_QUERY_BATCH_SIZE).iterator();
long numRows = 0;
AtomicLong numResultRows = new AtomicLong(0);
while (iterable.hasNext() && queryStatusChecker.isQueryRunning()) {
numRows++;
spiller.writeRows((Block block, int rowNum) -> {
Document doc = iterable.next();
boolean matched = true;
for (Field nextField : recordsRequest.getSchema().getFields()) {
Object value = TypeUtils.coerce(nextField, doc.get(nextField.getName()));
Types.MinorType fieldType = Types.getMinorTypeForArrowType(nextField.getType());
try {
switch(fieldType) {
case LIST:
case STRUCT:
matched &= block.offerComplexValue(nextField.getName(), rowNum, DEFAULT_FIELD_RESOLVER, value);
break;
default:
matched &= block.offerValue(nextField.getName(), rowNum, value);
break;
}
if (!matched) {
return 0;
}
} catch (Exception ex) {
throw new RuntimeException("Error while processing field " + nextField.getName(), ex);
}
}
numResultRows.getAndIncrement();
return 1;
});
}
logger.info("readWithConstraint: numRows[{}] numResultRows[{}]", numRows, numResultRows.get());
}
Aggregations