use of com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation in project aws-athena-query-federation by awslabs.
the class NeptuneRecordHandlerTest method doReadRecordsSpill.
@Test
public void doReadRecordsSpill() throws Exception {
S3SpillLocation splitLoc = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
allocator = new BlockAllocatorImpl();
// Greater Than filter
HashMap<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put("property1", SortedRangeSet.of(Range.greaterThan(allocator, Types.MinorType.INT.getType(), 9)));
buildGraphTraversal();
ReadRecordsRequest request = new ReadRecordsRequest(IDENTITY, DEFAULT_CATALOG, QUERY_ID, TABLE_NAME, schemaPGVertexForRead, Split.newBuilder(splitLoc, keyFactory.create()).build(), // ~1.5MB so we should see some spill
new Constraints(constraintsMap), // ~1.5MB so we should see some spill
1_500_000L, 0L);
RecordResponse rawResponse = handler.doReadRecords(allocator, request);
assertTrue(rawResponse instanceof RemoteReadRecordsResponse);
try (RemoteReadRecordsResponse response = (RemoteReadRecordsResponse) rawResponse) {
logger.info("doReadRecordsSpill: remoteBlocks[{}]", response.getRemoteBlocks().size());
assertTrue(response.getNumberBlocks() == 1);
int blockNum = 0;
for (SpillLocation next : response.getRemoteBlocks()) {
S3SpillLocation spillLocation = (S3SpillLocation) next;
try (Block block = spillReader.read(spillLocation, response.getEncryptionKey(), response.getSchema())) {
logger.info("doReadRecordsSpill: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount());
logger.info("doReadRecordsSpill: {}", BlockUtils.rowToString(block, 0));
assertNotNull(BlockUtils.rowToString(block, 0));
}
}
}
}
use of com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation in project aws-athena-query-federation by awslabs.
the class PostGreSqlMetadataHandler method doGetSplits.
@Override
public GetSplitsResponse doGetSplits(BlockAllocator blockAllocator, GetSplitsRequest getSplitsRequest) {
LOGGER.info("{}: Catalog {}, table {}", getSplitsRequest.getQueryId(), getSplitsRequest.getTableName().getSchemaName(), getSplitsRequest.getTableName().getTableName());
int partitionContd = decodeContinuationToken(getSplitsRequest);
Set<Split> splits = new HashSet<>();
Block partitions = getSplitsRequest.getPartitions();
boolean splitterUsed = false;
if (partitions.getRowCount() == 1) {
FieldReader partitionsSchemaFieldReader = partitions.getFieldReader(BLOCK_PARTITION_SCHEMA_COLUMN_NAME);
partitionsSchemaFieldReader.setPosition(0);
FieldReader partitionsFieldReader = partitions.getFieldReader(BLOCK_PARTITION_COLUMN_NAME);
partitionsFieldReader.setPosition(0);
if (ALL_PARTITIONS.equals(partitionsSchemaFieldReader.readText().toString()) && ALL_PARTITIONS.equals(partitionsFieldReader.readText().toString())) {
for (String splitClause : getSplitClauses(getSplitsRequest.getTableName())) {
// Every split must have a unique location if we wish to spill to avoid failures
SpillLocation spillLocation = makeSpillLocation(getSplitsRequest);
Split.Builder splitBuilder = Split.newBuilder(spillLocation, makeEncryptionKey()).add(BLOCK_PARTITION_SCHEMA_COLUMN_NAME, String.valueOf(partitionsSchemaFieldReader.readText())).add(BLOCK_PARTITION_COLUMN_NAME, String.valueOf(splitClause));
splits.add(splitBuilder.build());
if (splits.size() >= MAX_SPLITS_PER_REQUEST) {
throw new RuntimeException("Max splits supported with splitter " + MAX_SPLITS_PER_REQUEST);
}
splitterUsed = true;
}
}
}
if (!splitterUsed) {
for (int curPartition = partitionContd; curPartition < partitions.getRowCount(); curPartition++) {
FieldReader partitionsSchemaFieldReader = partitions.getFieldReader(BLOCK_PARTITION_SCHEMA_COLUMN_NAME);
partitionsSchemaFieldReader.setPosition(curPartition);
FieldReader partitionsFieldReader = partitions.getFieldReader(BLOCK_PARTITION_COLUMN_NAME);
partitionsFieldReader.setPosition(curPartition);
// Every split must have a unique location if we wish to spill to avoid failures
SpillLocation spillLocation = makeSpillLocation(getSplitsRequest);
LOGGER.info("{}: Input partition is {}", getSplitsRequest.getQueryId(), String.valueOf(partitionsFieldReader.readText()));
Split.Builder splitBuilder = Split.newBuilder(spillLocation, makeEncryptionKey()).add(BLOCK_PARTITION_SCHEMA_COLUMN_NAME, String.valueOf(partitionsSchemaFieldReader.readText())).add(BLOCK_PARTITION_COLUMN_NAME, String.valueOf(partitionsFieldReader.readText()));
splits.add(splitBuilder.build());
if (splits.size() >= MAX_SPLITS_PER_REQUEST) {
// We exceeded the number of split we want to return in a single request, return and provide a continuation token.
return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, encodeContinuationToken(curPartition + 1));
}
}
}
return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, null);
}
use of com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation in project aws-athena-query-federation by awslabs.
the class TPCDSRecordHandlerTest method doReadRecordsSpill.
@Test
public void doReadRecordsSpill() throws Exception {
logger.info("doReadRecordsSpill: enter");
Map<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put("c_current_cdemo_sk", SortedRangeSet.of(Range.range(allocator, Types.MinorType.BIGINT.getType(), 100L, true, 100_000_000L, true)));
ReadRecordsRequest request = new ReadRecordsRequest(identity, "catalog", "queryId-" + System.currentTimeMillis(), new TableName("tpcds1", table.getName()), schemaForRead, Split.newBuilder(S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build(), keyFactory.create()).add(SPLIT_NUMBER_FIELD, "0").add(SPLIT_TOTAL_NUMBER_FIELD, "10000").add(SPLIT_SCALE_FACTOR_FIELD, "1").build(), new Constraints(constraintsMap), // ~1.5MB so we should see some spill
1_500_000L, 0);
RecordResponse rawResponse = handler.doReadRecords(allocator, request);
assertTrue(rawResponse instanceof RemoteReadRecordsResponse);
try (RemoteReadRecordsResponse response = (RemoteReadRecordsResponse) rawResponse) {
logger.info("doReadRecordsSpill: remoteBlocks[{}]", response.getRemoteBlocks().size());
assertTrue(response.getNumberBlocks() > 1);
int blockNum = 0;
for (SpillLocation next : response.getRemoteBlocks()) {
S3SpillLocation spillLocation = (S3SpillLocation) next;
try (Block block = spillReader.read(spillLocation, response.getEncryptionKey(), response.getSchema())) {
logger.info("doReadRecordsSpill: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount());
// assertTrue(++blockNum < response.getRemoteBlocks().size() && block.getRowCount() > 10_000);
logger.info("doReadRecordsSpill: {}", BlockUtils.rowToString(block, 0));
assertNotNull(BlockUtils.rowToString(block, 0));
}
}
}
logger.info("doReadRecordsSpill: exit");
}
use of com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation in project aws-athena-query-federation by awslabs.
the class AwsCmdbMetadataHandler method doGetSplits.
/**
* Delegates to the TableProvider that is registered for the requested table.
*
* @see MetadataHandler
*/
@Override
public GetSplitsResponse doGetSplits(BlockAllocator blockAllocator, GetSplitsRequest getSplitsRequest) {
TableProvider tableProvider = tableProviders.get(getSplitsRequest.getTableName());
if (tableProvider == null) {
throw new RuntimeException("Unknown table " + getSplitsRequest.getTableName());
}
// Every split needs a unique spill location.
SpillLocation spillLocation = makeSpillLocation(getSplitsRequest);
EncryptionKey encryptionKey = makeEncryptionKey();
Split split = Split.newBuilder(spillLocation, encryptionKey).build();
return new GetSplitsResponse(getSplitsRequest.getCatalogName(), split);
}
use of com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation in project aws-athena-query-federation by awslabs.
the class AbstractTableProviderTest method validateRead.
protected void validateRead(Schema schema, S3BlockSpillReader reader, List<SpillLocation> locations, EncryptionKey encryptionKey) {
int blockNum = 0;
int rowNum = 0;
for (SpillLocation next : locations) {
S3SpillLocation spillLocation = (S3SpillLocation) next;
try (Block block = reader.read(spillLocation, encryptionKey, schema)) {
logger.info("validateRead: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount());
for (int i = 0; i < block.getRowCount(); i++) {
logger.info("validateRead: {}", BlockUtils.rowToString(block, i));
rowNum++;
validateRow(block, i);
}
} catch (Exception ex) {
throw new RuntimeException(ex);
}
}
assertEquals(getExpectedRows(), rowNum);
}
Aggregations