use of com.amazonaws.athena.connector.lambda.data.writers.GeneratedRowWriter in project aws-athena-query-federation by awslabs.
the class JdbcRecordHandler method readWithConstraint.
@Override
public void readWithConstraint(BlockSpiller blockSpiller, ReadRecordsRequest readRecordsRequest, QueryStatusChecker queryStatusChecker) {
LOGGER.info("{}: Catalog: {}, table {}, splits {}", readRecordsRequest.getQueryId(), readRecordsRequest.getCatalogName(), readRecordsRequest.getTableName(), readRecordsRequest.getSplit().getProperties());
try (Connection connection = this.jdbcConnectionFactory.getConnection(getCredentialProvider())) {
// For consistency. This is needed to be false to enable streaming for some database types.
connection.setAutoCommit(false);
try (PreparedStatement preparedStatement = buildSplitSql(connection, readRecordsRequest.getCatalogName(), readRecordsRequest.getTableName(), readRecordsRequest.getSchema(), readRecordsRequest.getConstraints(), readRecordsRequest.getSplit());
ResultSet resultSet = preparedStatement.executeQuery()) {
Map<String, String> partitionValues = readRecordsRequest.getSplit().getProperties();
GeneratedRowWriter.RowWriterBuilder rowWriterBuilder = GeneratedRowWriter.newBuilder(readRecordsRequest.getConstraints());
for (Field next : readRecordsRequest.getSchema().getFields()) {
if (next.getType() instanceof ArrowType.List) {
rowWriterBuilder.withFieldWriterFactory(next.getName(), makeFactory(next));
} else {
rowWriterBuilder.withExtractor(next.getName(), makeExtractor(next, resultSet, partitionValues));
}
}
GeneratedRowWriter rowWriter = rowWriterBuilder.build();
int rowsReturnedFromDatabase = 0;
while (resultSet.next()) {
if (!queryStatusChecker.isQueryRunning()) {
return;
}
blockSpiller.writeRows((Block block, int rowNum) -> rowWriter.writeRow(block, rowNum, resultSet) ? 1 : 0);
rowsReturnedFromDatabase++;
}
LOGGER.info("{} rows returned by database.", rowsReturnedFromDatabase);
connection.commit();
}
} catch (SQLException sqlException) {
throw new RuntimeException(sqlException.getErrorCode() + ": " + sqlException.getMessage(), sqlException);
}
}
use of com.amazonaws.athena.connector.lambda.data.writers.GeneratedRowWriter in project aws-athena-query-federation by awslabs.
the class VerticaRecordHandler method readWithConstraint.
/**
* Used to read the row data associated with the provided Split.
*
* @param spiller A BlockSpiller that should be used to write the row data associated with this Split.
* The BlockSpiller automatically handles chunking the response, encrypting, and spilling to S3.
* @param recordsRequest Details of the read request, including:
* 1. The Split
* 2. The Catalog, Database, and Table the read request is for.
* 3. The filtering predicate (if any)
* 4. The columns required for projection.
* @param queryStatusChecker A QueryStatusChecker that you can use to stop doing work for a query that has already terminated
* @throws IOException Throws an IOException
*/
@Override
protected void readWithConstraint(BlockSpiller spiller, ReadRecordsRequest recordsRequest, QueryStatusChecker queryStatusChecker) throws IOException {
logger.info("readWithConstraint: schema[{}] tableName[{}]", recordsRequest.getSchema(), recordsRequest.getTableName());
Schema schemaName = recordsRequest.getSchema();
Split split = recordsRequest.getSplit();
String id = split.getProperty("query_id");
String exportBucket = split.getProperty("exportBucket");
String s3ObjectKey = split.getProperty("s3ObjectKey");
if (!s3ObjectKey.isEmpty()) {
// get column name and type from the Schema
HashMap<String, Types.MinorType> mapOfNamesAndTypes = new HashMap<>();
HashMap<String, Object> mapOfCols = new HashMap<>();
for (Field field : schemaName.getFields()) {
Types.MinorType minorTypeForArrowType = Types.getMinorTypeForArrowType(field.getType());
mapOfNamesAndTypes.put(field.getName(), minorTypeForArrowType);
mapOfCols.put(field.getName(), null);
}
// creating a RowContext class to hold the column name and value.
final RowContext rowContext = new RowContext(id);
// Generating the RowWriter and Extractor
GeneratedRowWriter.RowWriterBuilder builder = GeneratedRowWriter.newBuilder(recordsRequest.getConstraints());
for (Field next : recordsRequest.getSchema().getFields()) {
Extractor extractor = makeExtractor(next, mapOfNamesAndTypes, mapOfCols);
builder.withExtractor(next.getName(), extractor);
}
GeneratedRowWriter rowWriter = builder.build();
/*
Using S3 Select to read the S3 Parquet file generated in the split
*/
// Creating the read Request
SelectObjectContentRequest request = generateBaseParquetRequest(exportBucket, s3ObjectKey);
try (SelectObjectContentResult result = amazonS3.selectObjectContent(request)) {
InputStream resultInputStream = result.getPayload().getRecordsInputStream();
BufferedReader streamReader = new BufferedReader(new InputStreamReader(resultInputStream, StandardCharsets.UTF_8));
String inputStr;
while ((inputStr = streamReader.readLine()) != null) {
HashMap<String, Object> map = new HashMap<>();
// we are reading the parquet files, but serializing the output it as JSON as SDK provides a Parquet InputSerialization, but only a JSON or CSV OutputSerializatio
ObjectMapper objectMapper = new ObjectMapper();
map = objectMapper.readValue(inputStr, HashMap.class);
rowContext.setNameValue(map);
// Passing the RowContext to BlockWriter;
spiller.writeRows((Block block, int rowNum) -> rowWriter.writeRow(block, rowNum, rowContext) ? 1 : 0);
}
} catch (Exception e) {
throw new RuntimeException("Error in connecting to S3 and selecting the object content for object : " + s3ObjectKey, e);
}
}
}
use of com.amazonaws.athena.connector.lambda.data.writers.GeneratedRowWriter in project aws-athena-query-federation by awslabs.
the class TimestreamRecordHandler method buildRowWriter.
private GeneratedRowWriter buildRowWriter(ReadRecordsRequest request) {
GeneratedRowWriter.RowWriterBuilder builder = GeneratedRowWriter.newBuilder(request.getConstraints());
int fieldNum = 0;
for (Field nextField : request.getSchema().getFields()) {
int curFieldNum = fieldNum++;
switch(Types.getMinorTypeForArrowType(nextField.getType())) {
case VARCHAR:
builder.withExtractor(nextField.getName(), (VarCharExtractor) (Object context, NullableVarCharHolder value) -> {
value.isSet = 1;
value.value = ((Row) context).getData().get(curFieldNum).getScalarValue();
});
break;
case FLOAT8:
builder.withExtractor(nextField.getName(), (Float8Extractor) (Object context, NullableFloat8Holder value) -> {
value.isSet = 1;
value.value = Double.valueOf(((Row) context).getData().get(curFieldNum).getScalarValue());
});
break;
case BIT:
builder.withExtractor(nextField.getName(), (BitExtractor) (Object context, NullableBitHolder value) -> {
value.isSet = 1;
value.value = Boolean.valueOf(((Row) context).getData().get(curFieldNum).getScalarValue()) == false ? 0 : 1;
});
break;
case BIGINT:
builder.withExtractor(nextField.getName(), (BigIntExtractor) (Object context, NullableBigIntHolder value) -> {
value.isSet = 1;
value.value = Long.valueOf(((Row) context).getData().get(curFieldNum).getScalarValue());
});
break;
case DATEMILLI:
builder.withExtractor(nextField.getName(), (DateMilliExtractor) (Object context, NullableDateMilliHolder value) -> {
value.isSet = 1;
value.value = TIMESTAMP_FORMATTER.parse(((Row) context).getData().get(curFieldNum).getScalarValue()).getTime();
});
break;
case LIST:
// TODO: This presently only supports TimeSeries results but it is possible that customers may
// generate LIST type results for other reasons when using VIEWs. For now this seems like an OK
// compromise since it enables an important capability of TimeStream even if it doesn't enable arbitrary
// complex types.
buildTimeSeriesExtractor(builder, nextField, curFieldNum);
break;
default:
throw new RuntimeException("Unsupported field type[" + nextField.getType() + "] for field[" + nextField.getName() + "]");
}
}
return builder.build();
}
use of com.amazonaws.athena.connector.lambda.data.writers.GeneratedRowWriter in project aws-athena-query-federation by awslabs.
the class TimestreamRecordHandler method readWithConstraint.
/**
* Scans TimeStream.
*
* @see RecordHandler
*/
@Override
protected void readWithConstraint(BlockSpiller spiller, ReadRecordsRequest recordsRequest, QueryStatusChecker queryStatusChecker) {
TableName tableName = recordsRequest.getTableName();
SelectQueryBuilder queryBuilder = queryFactory.createSelectQueryBuilder(GlueMetadataHandler.VIEW_METADATA_FIELD);
String query = queryBuilder.withDatabaseName(tableName.getSchemaName()).withTableName(tableName.getTableName()).withProjection(recordsRequest.getSchema()).withConjucts(recordsRequest.getConstraints()).build();
logger.info("readWithConstraint: query[{}]", query);
GeneratedRowWriter rowWriter = buildRowWriter(recordsRequest);
String nextToken = null;
long numRows = 0;
do {
QueryResult queryResult = tsQuery.query(new QueryRequest().withQueryString(query).withNextToken(nextToken));
List<Row> data = queryResult.getRows();
if (data != null) {
numRows += data.size();
for (Row nextRow : data) {
spiller.writeRows((Block block, int rowNum) -> rowWriter.writeRow(block, rowNum, nextRow) ? 1 : 0);
}
}
nextToken = queryResult.getNextToken();
logger.info("readWithConstraint: numRows[{}]", numRows);
} while (nextToken != null && !nextToken.isEmpty());
}
use of com.amazonaws.athena.connector.lambda.data.writers.GeneratedRowWriter in project aws-athena-query-federation by awslabs.
the class ElasticsearchRecordHandler method readWithConstraint.
/**
* Used to read the row data associated with the provided Split.
*
* @param spiller A BlockSpiller that should be used to write the row data associated with this Split.
* The BlockSpiller automatically handles chunking the response, encrypting, and spilling to S3.
* @param recordsRequest Details of the read request, including:
* 1. The Split
* 2. The Catalog, Database, and Table the read request is for.
* 3. The filtering predicate (if any)
* 4. The columns required for projection.
* @param queryStatusChecker A QueryStatusChecker that you can use to stop doing work for a query that has already terminated
* @throws RuntimeException when an error occurs while attempting to send the query, or the query timed out.
* @note Avoid writing >10 rows per-call to BlockSpiller.writeRow(...) because this will limit the BlockSpiller's
* ability to control Block size. The resulting increase in Block size may cause failures and reduced performance.
*/
@Override
protected void readWithConstraint(BlockSpiller spiller, ReadRecordsRequest recordsRequest, QueryStatusChecker queryStatusChecker) throws RuntimeException {
logger.info("readWithConstraint - enter - Domain: {}, Index: {}, Mapping: {}", recordsRequest.getTableName().getSchemaName(), recordsRequest.getTableName().getTableName(), recordsRequest.getSchema());
String domain = recordsRequest.getTableName().getSchemaName();
String endpoint = recordsRequest.getSplit().getProperty(domain);
String index = recordsRequest.getTableName().getTableName();
String shard = recordsRequest.getSplit().getProperty(ElasticsearchMetadataHandler.SHARD_KEY);
long numRows = 0;
if (queryStatusChecker.isQueryRunning()) {
AwsRestHighLevelClient client = clientFactory.getOrCreateClient(endpoint);
try {
// Create field extractors for all data types in the schema.
GeneratedRowWriter rowWriter = createFieldExtractors(recordsRequest);
// Create a new search-source injected with the projection, predicate, and the pagination batch size.
SearchSourceBuilder searchSource = new SearchSourceBuilder().size(QUERY_BATCH_SIZE).timeout(new TimeValue(queryTimeout, TimeUnit.SECONDS)).fetchSource(ElasticsearchQueryUtils.getProjection(recordsRequest.getSchema())).query(ElasticsearchQueryUtils.getQuery(recordsRequest.getConstraints().getSummary()));
// Create a new search-request for the specified index.
SearchRequest searchRequest = new SearchRequest(index).preference(shard);
int hitsNum;
int currPosition = 0;
do {
// Process the search request injecting the search-source, and setting the from position
// used for pagination of results.
SearchResponse searchResponse = client.getDocuments(searchRequest.source(searchSource.from(currPosition)));
// Throw on query timeout.
if (searchResponse.isTimedOut()) {
throw new RuntimeException("Request for index (" + index + ") " + shard + " timed out.");
}
// Increment current position to next batch of results.
currPosition += QUERY_BATCH_SIZE;
// Process hits.
Iterator<SearchHit> hitIterator = searchResponse.getHits().iterator();
hitsNum = searchResponse.getHits().getHits().length;
while (hitIterator.hasNext() && queryStatusChecker.isQueryRunning()) {
++numRows;
spiller.writeRows((Block block, int rowNum) -> rowWriter.writeRow(block, rowNum, client.getDocument(hitIterator.next())) ? 1 : 0);
}
// if hitsNum < QUERY_BATCH_SIZE, then this is the last batch of documents.
} while (hitsNum == QUERY_BATCH_SIZE && queryStatusChecker.isQueryRunning());
} catch (IOException error) {
throw new RuntimeException("Error sending search query: " + error.getMessage(), error);
}
}
logger.info("readWithConstraint: numRows[{}]", numRows);
}
Aggregations