use of com.amazonaws.athena.connector.lambda.data.SimpleBlockWriter in project aws-athena-query-federation by awslabs.
the class MetadataHandler method doGetTableLayout.
/**
* Used to get the partitions that must be read from the request table in order to satisfy the requested predicate.
*
* @param allocator Tool for creating and managing Apache Arrow Blocks.
* @param request Provides details of the catalog, database, and table being queried as well as any filter predicate.
* @return A GetTableLayoutResponse which primarily contains:
* 1. An Apache Arrow Block with 0 or more partitions to read. 0 partitions implies there are 0 rows to read.
* 2. Set<String> of partition column names which should correspond to columns in your Apache Arrow Block.
* @note Partitions are opaque to Amazon Athena in that it does not understand their contents, just that it must call
* doGetSplits(...) for each partition you return in order to determine which reads to perform and if those reads
* can be parallelized. This means the contents of this response are more for you than they are for Athena.
* @note Partitions are partially opaque to Amazon Athena in that it only understands your partition columns and
* how to filter out partitions that do not meet the query's constraints. Any additional columns you add to the
* partition data are ignored by Athena but passed on to calls on GetSplits.
*/
public GetTableLayoutResponse doGetTableLayout(final BlockAllocator allocator, final GetTableLayoutRequest request) throws Exception {
SchemaBuilder constraintSchema = new SchemaBuilder().newBuilder();
SchemaBuilder partitionSchemaBuilder = new SchemaBuilder().newBuilder();
/**
* Add our partition columns to the response schema so the engine knows how to interpret the list of
* partitions we are going to return.
*/
for (String nextPartCol : request.getPartitionCols()) {
Field partitionCol = request.getSchema().findField(nextPartCol);
partitionSchemaBuilder.addField(nextPartCol, partitionCol.getType());
constraintSchema.addField(nextPartCol, partitionCol.getType());
}
enhancePartitionSchema(partitionSchemaBuilder, request);
Schema partitionSchema = partitionSchemaBuilder.build();
if (partitionSchema.getFields().isEmpty() && partitionSchema.getCustomMetadata().isEmpty()) {
// Even though our table doesn't support complex layouts, partitioning or metadata, we need to convey that there is at least
// 1 partition to read as part of the query or Athena will assume partition pruning found no candidate layouts to read.
Block partitions = BlockUtils.newBlock(allocator, PARTITION_ID_COL, Types.MinorType.INT.getType(), 1);
return new GetTableLayoutResponse(request.getCatalogName(), request.getTableName(), partitions);
}
/**
* Now use the constraint that was in the request to do some partition pruning. Here we are just
* generating some fake values for the partitions but in a real implementation you'd use your metastore
* or knowledge of the actual table's physical layout to do this.
*/
try (ConstraintEvaluator constraintEvaluator = new ConstraintEvaluator(allocator, constraintSchema.build(), request.getConstraints());
QueryStatusChecker queryStatusChecker = new QueryStatusChecker(athena, athenaInvoker, request.getQueryId())) {
Block partitions = allocator.createBlock(partitionSchemaBuilder.build());
partitions.constrain(constraintEvaluator);
SimpleBlockWriter blockWriter = new SimpleBlockWriter(partitions);
getPartitions(blockWriter, request, queryStatusChecker);
return new GetTableLayoutResponse(request.getCatalogName(), request.getTableName(), partitions);
}
}
Aggregations