use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class HiveMetadataHandler method doGetSplits.
/**
* Used to split-up the reads required to scan the requested batch of partition(s).
*
* @param blockAllocator Tool for creating and managing Apache Arrow Blocks.
* @param getSplitsRequest Provides details of the Hive catalog, database, table, and partition(s) being queried as well as
* any filter predicate.
* @return A GetSplitsResponse which primarily contains:
* 1. A Set of Splits which represent read operations Amazon Athena must perform by calling your read function.
* 2. (Optional) A continuation token which allows you to paginate the generation of splits for large queries.
*/
@Override
public GetSplitsResponse doGetSplits(BlockAllocator blockAllocator, GetSplitsRequest getSplitsRequest) {
LOGGER.info("{}: Catalog {}, table {}", getSplitsRequest.getQueryId(), getSplitsRequest.getTableName().getSchemaName(), getSplitsRequest.getTableName().getTableName());
int partitionContd = decodeContinuationToken(getSplitsRequest);
Set<Split> splits = new HashSet<>();
Block partitions = getSplitsRequest.getPartitions();
for (int curPartition = partitionContd; curPartition < partitions.getRowCount(); curPartition++) {
FieldReader locationReader = partitions.getFieldReader(HiveConstants.BLOCK_PARTITION_COLUMN_NAME);
locationReader.setPosition(curPartition);
SpillLocation spillLocation = makeSpillLocation(getSplitsRequest);
Split.Builder splitBuilder = Split.newBuilder(spillLocation, makeEncryptionKey()).add(HiveConstants.BLOCK_PARTITION_COLUMN_NAME, String.valueOf(locationReader.readText()));
splits.add(splitBuilder.build());
if (splits.size() >= HiveConstants.MAX_SPLITS_PER_REQUEST) {
return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, encodeContinuationToken(curPartition));
}
}
return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, null);
}
use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class NeptuneRecordHandlerTest method doReadRecordsSpill.
@Test
public void doReadRecordsSpill() throws Exception {
S3SpillLocation splitLoc = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
allocator = new BlockAllocatorImpl();
// Greater Than filter
HashMap<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put("property1", SortedRangeSet.of(Range.greaterThan(allocator, Types.MinorType.INT.getType(), 9)));
buildGraphTraversal();
ReadRecordsRequest request = new ReadRecordsRequest(IDENTITY, DEFAULT_CATALOG, QUERY_ID, TABLE_NAME, schemaPGVertexForRead, Split.newBuilder(splitLoc, keyFactory.create()).build(), // ~1.5MB so we should see some spill
new Constraints(constraintsMap), // ~1.5MB so we should see some spill
1_500_000L, 0L);
RecordResponse rawResponse = handler.doReadRecords(allocator, request);
assertTrue(rawResponse instanceof RemoteReadRecordsResponse);
try (RemoteReadRecordsResponse response = (RemoteReadRecordsResponse) rawResponse) {
logger.info("doReadRecordsSpill: remoteBlocks[{}]", response.getRemoteBlocks().size());
assertTrue(response.getNumberBlocks() == 1);
int blockNum = 0;
for (SpillLocation next : response.getRemoteBlocks()) {
S3SpillLocation spillLocation = (S3SpillLocation) next;
try (Block block = spillReader.read(spillLocation, response.getEncryptionKey(), response.getSchema())) {
logger.info("doReadRecordsSpill: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount());
logger.info("doReadRecordsSpill: {}", BlockUtils.rowToString(block, 0));
assertNotNull(BlockUtils.rowToString(block, 0));
}
}
}
}
use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class OracleMetadataHandler method getPartitions.
/**
* If it is a table with no partition, then data will be fetched with single split.
* If it is a partitioned table, we are fetching the partition info and creating splits equals to the number of partitions
* for parallel processing.
* @param blockWriter
* @param getTableLayoutRequest
* @param queryStatusChecker
*/
@Override
public void getPartitions(final BlockWriter blockWriter, final GetTableLayoutRequest getTableLayoutRequest, QueryStatusChecker queryStatusChecker) {
LOGGER.debug("{}: Schema {}, table {}", getTableLayoutRequest.getQueryId(), getTableLayoutRequest.getTableName().getSchemaName(), getTableLayoutRequest.getTableName().getTableName());
try (Connection connection = getJdbcConnectionFactory().getConnection(getCredentialProvider())) {
List<String> parameters = Arrays.asList(getTableLayoutRequest.getTableName().getTableName().toUpperCase());
try (PreparedStatement preparedStatement = new PreparedStatementBuilder().withConnection(connection).withQuery(GET_PARTITIONS_QUERY).withParameters(parameters).build();
ResultSet resultSet = preparedStatement.executeQuery()) {
// Return a single partition if no partitions defined
if (!resultSet.next()) {
blockWriter.writeRows((Block block, int rowNum) -> {
block.setValue(BLOCK_PARTITION_COLUMN_NAME, rowNum, ALL_PARTITIONS);
LOGGER.info("Adding partition {}", ALL_PARTITIONS);
// we wrote 1 row so we return 1
return 1;
});
} else {
do {
final String partitionName = resultSet.getString(PARTITION_COLUMN_NAME);
// 1. Returns all partitions of table, we are not supporting constraints push down to filter partitions.
// 2. This API is not paginated, we could use order by and limit clause with offsets here.
blockWriter.writeRows((Block block, int rowNum) -> {
block.setValue(BLOCK_PARTITION_COLUMN_NAME, rowNum, partitionName);
LOGGER.debug("Adding partition {}", partitionName);
// we wrote 1 row so we return 1
return 1;
});
} while (resultSet.next() && queryStatusChecker.isQueryRunning());
}
}
} catch (SQLException sqlException) {
throw new RuntimeException(sqlException.getErrorCode() + ": " + sqlException.getMessage(), sqlException);
}
}
use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class PostGreSqlMetadataHandler method doGetSplits.
@Override
public GetSplitsResponse doGetSplits(BlockAllocator blockAllocator, GetSplitsRequest getSplitsRequest) {
LOGGER.info("{}: Catalog {}, table {}", getSplitsRequest.getQueryId(), getSplitsRequest.getTableName().getSchemaName(), getSplitsRequest.getTableName().getTableName());
int partitionContd = decodeContinuationToken(getSplitsRequest);
Set<Split> splits = new HashSet<>();
Block partitions = getSplitsRequest.getPartitions();
boolean splitterUsed = false;
if (partitions.getRowCount() == 1) {
FieldReader partitionsSchemaFieldReader = partitions.getFieldReader(BLOCK_PARTITION_SCHEMA_COLUMN_NAME);
partitionsSchemaFieldReader.setPosition(0);
FieldReader partitionsFieldReader = partitions.getFieldReader(BLOCK_PARTITION_COLUMN_NAME);
partitionsFieldReader.setPosition(0);
if (ALL_PARTITIONS.equals(partitionsSchemaFieldReader.readText().toString()) && ALL_PARTITIONS.equals(partitionsFieldReader.readText().toString())) {
for (String splitClause : getSplitClauses(getSplitsRequest.getTableName())) {
// Every split must have a unique location if we wish to spill to avoid failures
SpillLocation spillLocation = makeSpillLocation(getSplitsRequest);
Split.Builder splitBuilder = Split.newBuilder(spillLocation, makeEncryptionKey()).add(BLOCK_PARTITION_SCHEMA_COLUMN_NAME, String.valueOf(partitionsSchemaFieldReader.readText())).add(BLOCK_PARTITION_COLUMN_NAME, String.valueOf(splitClause));
splits.add(splitBuilder.build());
if (splits.size() >= MAX_SPLITS_PER_REQUEST) {
throw new RuntimeException("Max splits supported with splitter " + MAX_SPLITS_PER_REQUEST);
}
splitterUsed = true;
}
}
}
if (!splitterUsed) {
for (int curPartition = partitionContd; curPartition < partitions.getRowCount(); curPartition++) {
FieldReader partitionsSchemaFieldReader = partitions.getFieldReader(BLOCK_PARTITION_SCHEMA_COLUMN_NAME);
partitionsSchemaFieldReader.setPosition(curPartition);
FieldReader partitionsFieldReader = partitions.getFieldReader(BLOCK_PARTITION_COLUMN_NAME);
partitionsFieldReader.setPosition(curPartition);
// Every split must have a unique location if we wish to spill to avoid failures
SpillLocation spillLocation = makeSpillLocation(getSplitsRequest);
LOGGER.info("{}: Input partition is {}", getSplitsRequest.getQueryId(), String.valueOf(partitionsFieldReader.readText()));
Split.Builder splitBuilder = Split.newBuilder(spillLocation, makeEncryptionKey()).add(BLOCK_PARTITION_SCHEMA_COLUMN_NAME, String.valueOf(partitionsSchemaFieldReader.readText())).add(BLOCK_PARTITION_COLUMN_NAME, String.valueOf(partitionsFieldReader.readText()));
splits.add(splitBuilder.build());
if (splits.size() >= MAX_SPLITS_PER_REQUEST) {
// We exceeded the number of split we want to return in a single request, return and provide a continuation token.
return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, encodeContinuationToken(curPartition + 1));
}
}
}
return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, null);
}
use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class RedisMetadataHandlerTest method doGetTableLayout.
@Test
public void doGetTableLayout() throws Exception {
Schema schema = SchemaBuilder.newBuilder().build();
GetTableLayoutRequest req = new GetTableLayoutRequest(IDENTITY, QUERY_ID, DEFAULT_CATALOG, TABLE_NAME, new Constraints(new HashMap<>()), schema, new HashSet<>());
GetTableLayoutResponse res = handler.doGetTableLayout(allocator, req);
logger.info("doGetTableLayout - {}", res);
Block partitions = res.getPartitions();
for (int row = 0; row < partitions.getRowCount() && row < 10; row++) {
logger.info("doGetTableLayout:{} {}", row, BlockUtils.rowToString(partitions, row));
}
assertTrue(partitions.getRowCount() > 0);
assertEquals(7, partitions.getFields().size());
logger.info("doGetTableLayout: partitions[{}]", partitions.getRowCount());
}
Aggregations