use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class MySqlMetadataHandler method doGetSplits.
@Override
public GetSplitsResponse doGetSplits(final BlockAllocator blockAllocator, final GetSplitsRequest getSplitsRequest) {
LOGGER.info("{}: Catalog {}, table {}", getSplitsRequest.getQueryId(), getSplitsRequest.getTableName().getSchemaName(), getSplitsRequest.getTableName().getTableName());
int partitionContd = decodeContinuationToken(getSplitsRequest);
Set<Split> splits = new HashSet<>();
Block partitions = getSplitsRequest.getPartitions();
// TODO consider splitting further depending on #rows or data size. Could use Hash key for splitting if no partitions.
for (int curPartition = partitionContd; curPartition < partitions.getRowCount(); curPartition++) {
FieldReader locationReader = partitions.getFieldReader(BLOCK_PARTITION_COLUMN_NAME);
locationReader.setPosition(curPartition);
SpillLocation spillLocation = makeSpillLocation(getSplitsRequest);
LOGGER.info("{}: Input partition is {}", getSplitsRequest.getQueryId(), locationReader.readText());
Split.Builder splitBuilder = Split.newBuilder(spillLocation, makeEncryptionKey()).add(BLOCK_PARTITION_COLUMN_NAME, String.valueOf(locationReader.readText()));
splits.add(splitBuilder.build());
if (splits.size() >= MAX_SPLITS_PER_REQUEST) {
// We exceeded the number of split we want to return in a single request, return and provide a continuation token.
return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, encodeContinuationToken(curPartition + 1));
}
}
return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, null);
}
use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class MySqlMetadataHandler method getPartitions.
@Override
public void getPartitions(final BlockWriter blockWriter, final GetTableLayoutRequest getTableLayoutRequest, QueryStatusChecker queryStatusChecker) {
LOGGER.info("{}: Schema {}, table {}", getTableLayoutRequest.getQueryId(), getTableLayoutRequest.getTableName().getSchemaName(), getTableLayoutRequest.getTableName().getTableName());
try (Connection connection = getJdbcConnectionFactory().getConnection(getCredentialProvider())) {
final String escape = connection.getMetaData().getSearchStringEscape();
List<String> parameters = Arrays.asList(getTableLayoutRequest.getTableName().getTableName(), getTableLayoutRequest.getTableName().getSchemaName());
try (PreparedStatement preparedStatement = new PreparedStatementBuilder().withConnection(connection).withQuery(GET_PARTITIONS_QUERY).withParameters(parameters).build();
ResultSet resultSet = preparedStatement.executeQuery()) {
// Return a single partition if no partitions defined
if (!resultSet.next()) {
blockWriter.writeRows((Block block, int rowNum) -> {
block.setValue(BLOCK_PARTITION_COLUMN_NAME, rowNum, ALL_PARTITIONS);
LOGGER.info("Adding partition {}", ALL_PARTITIONS);
// we wrote 1 row so we return 1
return 1;
});
} else {
do {
final String partitionName = resultSet.getString(PARTITION_COLUMN_NAME);
// 1. Returns all partitions of table, we are not supporting constraints push down to filter partitions.
// 2. This API is not paginated, we could use order by and limit clause with offsets here.
blockWriter.writeRows((Block block, int rowNum) -> {
block.setValue(BLOCK_PARTITION_COLUMN_NAME, rowNum, partitionName);
LOGGER.info("Adding partition {}", partitionName);
// we wrote 1 row so we return 1
return 1;
});
} while (resultSet.next() && queryStatusChecker.isQueryRunning());
}
}
} catch (SQLException sqlException) {
throw new RuntimeException(sqlException.getErrorCode() + ": " + sqlException.getMessage(), sqlException);
}
}
use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class HiveMetadataHandler method getPartitions.
/**
* Used to get the hive partitions that must be read from the request table in order to satisfy the requested predicate.
*
* @param blockWriter Used to write rows (hive partitions) into the Apache Arrow response.
* @param getTableLayoutRequest Provides details of the catalog, database, and table being queried as well as any filter predicate.
* @param queryStatusChecker A QueryStatusChecker that you can use to stop doing work for a query that has already terminated
* @throws SQLException A SQLException should be thrown for database connection failures , query syntax errors and so on.
*/
@Override
public void getPartitions(BlockWriter blockWriter, GetTableLayoutRequest getTableLayoutRequest, QueryStatusChecker queryStatusChecker) throws SQLException {
LOGGER.info("{}: Schema {}, table {}", getTableLayoutRequest.getQueryId(), getTableLayoutRequest.getTableName().getSchemaName(), getTableLayoutRequest.getTableName().getTableName());
try (Connection connection = getJdbcConnectionFactory().getConnection(getCredentialProvider());
Statement stmt = connection.createStatement();
PreparedStatement psmt = connection.prepareStatement(GET_METADATA_QUERY + getTableLayoutRequest.getTableName().getTableName().toUpperCase())) {
boolean isTablePartitioned = false;
ResultSet partitionResultset = stmt.executeQuery("show table extended like " + getTableLayoutRequest.getTableName().getTableName().toUpperCase());
while (partitionResultset != null && partitionResultset.next()) {
String partExists = partitionResultset.getString(1);
if (partExists.toUpperCase().contains("PARTITIONED")) {
String partValue = partExists.split(":")[1];
if (partValue.toUpperCase().contains("TRUE")) {
isTablePartitioned = true;
}
}
}
LOGGER.debug("isTablePartitioned:" + isTablePartitioned);
if (isTablePartitioned) {
ResultSet partitionRs = stmt.executeQuery("show partitions " + getTableLayoutRequest.getTableName().getTableName().toUpperCase());
Set<String> partition = new HashSet<>();
while (partitionRs != null && partitionRs.next()) {
partition.add(partitionRs.getString("Partition"));
}
if (!partition.isEmpty()) {
Map<String, String> columnHashMap = getMetadataForGivenTable(psmt);
addPartitions(partition, columnHashMap, blockWriter);
}
} else {
blockWriter.writeRows((Block block, int rowNum) -> {
block.setValue(HiveConstants.BLOCK_PARTITION_COLUMN_NAME, rowNum, HiveConstants.ALL_PARTITIONS);
return 1;
});
}
}
}
use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class OracleMetadataHandler method doGetSplits.
/**
* @param blockAllocator
* @param getSplitsRequest
* @return
*/
@Override
public GetSplitsResponse doGetSplits(final BlockAllocator blockAllocator, final GetSplitsRequest getSplitsRequest) {
LOGGER.debug("{}: Catalog {}, table {}", getSplitsRequest.getQueryId(), getSplitsRequest.getTableName().getSchemaName(), getSplitsRequest.getTableName().getTableName());
int partitionContd = decodeContinuationToken(getSplitsRequest);
Set<Split> splits = new HashSet<>();
Block partitions = getSplitsRequest.getPartitions();
// TODO consider splitting further depending on #rows or data size. Could use Hash key for splitting if no partitions.
for (int curPartition = partitionContd; curPartition < partitions.getRowCount(); curPartition++) {
FieldReader locationReader = partitions.getFieldReader(BLOCK_PARTITION_COLUMN_NAME);
locationReader.setPosition(curPartition);
SpillLocation spillLocation = makeSpillLocation(getSplitsRequest);
LOGGER.info("{}: Input partition is {}", getSplitsRequest.getQueryId(), locationReader.readText());
Split.Builder splitBuilder = Split.newBuilder(spillLocation, makeEncryptionKey()).add(BLOCK_PARTITION_COLUMN_NAME, String.valueOf(locationReader.readText()));
splits.add(splitBuilder.build());
if (splits.size() >= MAX_SPLITS_PER_REQUEST) {
// We exceeded the number of split we want to return in a single request, return and provide a continuation token.
return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, encodeContinuationToken(curPartition + 1));
}
}
return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, null);
}
use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class PropertyGraphHandler method executeQuery.
/**
* Used to read the row data associated with the provided Split.
*
* @param spiller A BlockSpiller that should be used to write the row
* data associated with this Split. The BlockSpiller
* automatically handles chunking the response,
* encrypting, and spilling to S3.
* @param recordsRequest Details of the read request, including: 1. The
* Split 2. The Catalog, Database, and Table the read
* request is for. 3. The filtering predicate (if any)
* 4. The columns required for projection.
* @param queryStatusChecker A QueryStatusChecker that you can use to stop doing
* work for a query that has already terminated
* @throws Exception
* @note Avoid writing >10 rows per-call to BlockSpiller.writeRow(...) because
* this will limit the BlockSpiller's ability to control Block size. The
* resulting increase in Block size may cause failures and reduced
* performance.
*/
public void executeQuery(ReadRecordsRequest recordsRequest, final QueryStatusChecker queryStatusChecker, final BlockSpiller spiller) throws Exception {
logger.debug("readWithConstraint: enter - " + recordsRequest.getSplit());
long numRows = 0;
Client client = neptuneConnection.getNeptuneClientConnection();
GraphTraversalSource graphTraversalSource = neptuneConnection.getTraversalSource(client);
GraphTraversal graphTraversal = null;
String labelName = recordsRequest.getTableName().getTableName();
GeneratedRowWriter.RowWriterBuilder builder = GeneratedRowWriter.newBuilder(recordsRequest.getConstraints());
String type = recordsRequest.getSchema().getCustomMetadata().get("componenttype");
TableSchemaMetaType tableSchemaMetaType = TableSchemaMetaType.valueOf(type.toUpperCase());
logger.debug("readWithConstraint: schema type is " + tableSchemaMetaType.toString());
if (tableSchemaMetaType != null) {
switch(tableSchemaMetaType) {
case VERTEX:
graphTraversal = graphTraversalSource.V().hasLabel(labelName);
getQueryPartForContraintsMap(graphTraversal, recordsRequest);
graphTraversal = graphTraversal.valueMap().with(WithOptions.tokens);
for (final Field nextField : recordsRequest.getSchema().getFields()) {
VertexRowWriter.writeRowTemplate(builder, nextField);
}
break;
case EDGE:
graphTraversal = graphTraversalSource.E().hasLabel(labelName);
getQueryPartForContraintsMap(graphTraversal, recordsRequest);
graphTraversal = graphTraversal.elementMap();
for (final Field nextField : recordsRequest.getSchema().getFields()) {
EdgeRowWriter.writeRowTemplate(builder, nextField);
}
break;
}
}
// log string equivalent of gremlin query
logger.debug("readWithConstraint: enter - " + GroovyTranslator.of("g").translate(graphTraversal.asAdmin().getBytecode()));
final GraphTraversal graphTraversalFinal1 = graphTraversal;
final GeneratedRowWriter rowWriter1 = builder.build();
while (graphTraversalFinal1.hasNext() && queryStatusChecker.isQueryRunning()) {
numRows++;
spiller.writeRows((final Block block, final int rowNum) -> {
final Map obj = (Map) graphTraversalFinal1.next();
return (rowWriter1.writeRow(block, rowNum, (Object) obj) ? 1 : 0);
});
}
logger.info("readWithConstraint: numRows[{}]", numRows);
}
Aggregations