use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class VerticaRecordHandler method readWithConstraint.
/**
* Used to read the row data associated with the provided Split.
*
* @param spiller A BlockSpiller that should be used to write the row data associated with this Split.
* The BlockSpiller automatically handles chunking the response, encrypting, and spilling to S3.
* @param recordsRequest Details of the read request, including:
* 1. The Split
* 2. The Catalog, Database, and Table the read request is for.
* 3. The filtering predicate (if any)
* 4. The columns required for projection.
* @param queryStatusChecker A QueryStatusChecker that you can use to stop doing work for a query that has already terminated
* @throws IOException Throws an IOException
*/
@Override
protected void readWithConstraint(BlockSpiller spiller, ReadRecordsRequest recordsRequest, QueryStatusChecker queryStatusChecker) throws IOException {
logger.info("readWithConstraint: schema[{}] tableName[{}]", recordsRequest.getSchema(), recordsRequest.getTableName());
Schema schemaName = recordsRequest.getSchema();
Split split = recordsRequest.getSplit();
String id = split.getProperty("query_id");
String exportBucket = split.getProperty("exportBucket");
String s3ObjectKey = split.getProperty("s3ObjectKey");
if (!s3ObjectKey.isEmpty()) {
// get column name and type from the Schema
HashMap<String, Types.MinorType> mapOfNamesAndTypes = new HashMap<>();
HashMap<String, Object> mapOfCols = new HashMap<>();
for (Field field : schemaName.getFields()) {
Types.MinorType minorTypeForArrowType = Types.getMinorTypeForArrowType(field.getType());
mapOfNamesAndTypes.put(field.getName(), minorTypeForArrowType);
mapOfCols.put(field.getName(), null);
}
// creating a RowContext class to hold the column name and value.
final RowContext rowContext = new RowContext(id);
// Generating the RowWriter and Extractor
GeneratedRowWriter.RowWriterBuilder builder = GeneratedRowWriter.newBuilder(recordsRequest.getConstraints());
for (Field next : recordsRequest.getSchema().getFields()) {
Extractor extractor = makeExtractor(next, mapOfNamesAndTypes, mapOfCols);
builder.withExtractor(next.getName(), extractor);
}
GeneratedRowWriter rowWriter = builder.build();
/*
Using S3 Select to read the S3 Parquet file generated in the split
*/
// Creating the read Request
SelectObjectContentRequest request = generateBaseParquetRequest(exportBucket, s3ObjectKey);
try (SelectObjectContentResult result = amazonS3.selectObjectContent(request)) {
InputStream resultInputStream = result.getPayload().getRecordsInputStream();
BufferedReader streamReader = new BufferedReader(new InputStreamReader(resultInputStream, StandardCharsets.UTF_8));
String inputStr;
while ((inputStr = streamReader.readLine()) != null) {
HashMap<String, Object> map = new HashMap<>();
// we are reading the parquet files, but serializing the output it as JSON as SDK provides a Parquet InputSerialization, but only a JSON or CSV OutputSerializatio
ObjectMapper objectMapper = new ObjectMapper();
map = objectMapper.readValue(inputStr, HashMap.class);
rowContext.setNameValue(map);
// Passing the RowContext to BlockWriter;
spiller.writeRows((Block block, int rowNum) -> rowWriter.writeRow(block, rowNum, rowContext) ? 1 : 0);
}
} catch (Exception e) {
throw new RuntimeException("Error in connecting to S3 and selecting the object content for object : " + s3ObjectKey, e);
}
}
}
use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class TimestreamRecordHandler method readWithConstraint.
/**
* Scans TimeStream.
*
* @see RecordHandler
*/
@Override
protected void readWithConstraint(BlockSpiller spiller, ReadRecordsRequest recordsRequest, QueryStatusChecker queryStatusChecker) {
TableName tableName = recordsRequest.getTableName();
SelectQueryBuilder queryBuilder = queryFactory.createSelectQueryBuilder(GlueMetadataHandler.VIEW_METADATA_FIELD);
String query = queryBuilder.withDatabaseName(tableName.getSchemaName()).withTableName(tableName.getTableName()).withProjection(recordsRequest.getSchema()).withConjucts(recordsRequest.getConstraints()).build();
logger.info("readWithConstraint: query[{}]", query);
GeneratedRowWriter rowWriter = buildRowWriter(recordsRequest);
String nextToken = null;
long numRows = 0;
do {
QueryResult queryResult = tsQuery.query(new QueryRequest().withQueryString(query).withNextToken(nextToken));
List<Row> data = queryResult.getRows();
if (data != null) {
numRows += data.size();
for (Row nextRow : data) {
spiller.writeRows((Block block, int rowNum) -> rowWriter.writeRow(block, rowNum, nextRow) ? 1 : 0);
}
}
nextToken = queryResult.getNextToken();
logger.info("readWithConstraint: numRows[{}]", numRows);
} while (nextToken != null && !nextToken.isEmpty());
}
use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class S3ObjectsTableProvider method toRow.
/**
* Maps a DBInstance into a row in our Apache Arrow response block(s).
*
* @param objectSummary The S3 ObjectSummary to map.
* @param spiller The BlockSpiller to use when we want to write a matching row to the response.
* @note The current implementation is rather naive in how it maps fields. It leverages a static
* list of fields that we'd like to provide and then explicitly filters and converts each field.
*/
private void toRow(S3ObjectSummary objectSummary, BlockSpiller spiller) {
spiller.writeRows((Block block, int row) -> {
boolean matched = true;
matched &= block.offerValue("bucket_name", row, objectSummary.getBucketName());
matched &= block.offerValue("e_tag", row, objectSummary.getETag());
matched &= block.offerValue("key", row, objectSummary.getKey());
matched &= block.offerValue("bytes", row, objectSummary.getSize());
matched &= block.offerValue("storage_class", row, objectSummary.getStorageClass());
matched &= block.offerValue("last_modified", row, objectSummary.getLastModified());
Owner owner = objectSummary.getOwner();
if (owner != null) {
matched &= block.offerValue("owner_name", row, owner.getDisplayName());
matched &= block.offerValue("owner_id", row, owner.getId());
}
return matched ? 1 : 0;
});
}
use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class AbstractTableProviderTest method validateRead.
protected void validateRead(Schema schema, S3BlockSpillReader reader, List<SpillLocation> locations, EncryptionKey encryptionKey) {
int blockNum = 0;
int rowNum = 0;
for (SpillLocation next : locations) {
S3SpillLocation spillLocation = (S3SpillLocation) next;
try (Block block = reader.read(spillLocation, encryptionKey, schema)) {
logger.info("validateRead: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount());
for (int i = 0; i < block.getRowCount(); i++) {
logger.info("validateRead: {}", BlockUtils.rowToString(block, i));
rowNum++;
validateRow(block, i);
}
} catch (Exception ex) {
throw new RuntimeException(ex);
}
}
assertEquals(getExpectedRows(), rowNum);
}
use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class ImpalaMetadataHandler method getPartitions.
/**
* Used to get the Impala partitions that must be read from the request table in order to satisfy the requested predicate.
*
* @param blockWriter Used to write rows (Impala partitions) into the Apache Arrow response.
* @param getTableLayoutRequest Provides details of the catalog, database, and table being queried as well as any filter predicate.
* @param queryStatusChecker A QueryStatusChecker that you can use to stop doing work for a query that has already terminated
* @throws SQLException A SQLException should be thrown for database connection failures , query syntax errors and so on.
*/
@Override
public void getPartitions(BlockWriter blockWriter, GetTableLayoutRequest getTableLayoutRequest, QueryStatusChecker queryStatusChecker) throws SQLException {
LOGGER.info("{}: Schema {}, table {}", getTableLayoutRequest.getQueryId(), getTableLayoutRequest.getTableName().getSchemaName(), getTableLayoutRequest.getTableName().getTableName());
try (Connection connection = getJdbcConnectionFactory().getConnection(getCredentialProvider());
Statement stmt = connection.createStatement();
PreparedStatement psmt = connection.prepareStatement(GET_METADATA_QUERY + getTableLayoutRequest.getTableName().getTableName().toUpperCase())) {
Map<String, String> columnHashMap = getMetadataForGivenTable(psmt);
String tableType = columnHashMap.get("TableType");
if (tableType == null) {
ResultSet partitionRs = stmt.executeQuery("show files in " + getTableLayoutRequest.getTableName().getTableName().toUpperCase());
Set<String> partition = new HashSet<>();
while (partitionRs != null && partitionRs.next()) {
String partitionString = partitionRs.getString("Partition");
if (partitionString != null && !partitionString.isEmpty()) {
partition.add(partitionString);
}
}
LOGGER.debug("isTablePartitioned:" + !partition.isEmpty());
if (!partition.isEmpty()) {
addPartitions(partition, columnHashMap, blockWriter);
} else {
blockWriter.writeRows((Block block, int rowNum) -> {
block.setValue(ImpalaConstants.BLOCK_PARTITION_COLUMN_NAME, rowNum, ImpalaConstants.ALL_PARTITIONS);
return 1;
});
}
} else {
blockWriter.writeRows((Block block, int rowNum) -> {
block.setValue(ImpalaConstants.BLOCK_PARTITION_COLUMN_NAME, rowNum, ImpalaConstants.ALL_PARTITIONS);
return 1;
});
}
}
}
Aggregations