use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class PostGreSqlMetadataHandler method getPartitions.
@Override
public void getPartitions(final BlockWriter blockWriter, final GetTableLayoutRequest getTableLayoutRequest, QueryStatusChecker queryStatusChecker) {
LOGGER.info("{}: Catalog {}, table {}", getTableLayoutRequest.getQueryId(), getTableLayoutRequest.getTableName().getSchemaName(), getTableLayoutRequest.getTableName().getTableName());
try (Connection connection = getJdbcConnectionFactory().getConnection(getCredentialProvider())) {
List<String> parameters = Arrays.asList(getTableLayoutRequest.getTableName().getSchemaName(), getTableLayoutRequest.getTableName().getTableName());
try (PreparedStatement preparedStatement = new PreparedStatementBuilder().withConnection(connection).withQuery(GET_PARTITIONS_QUERY).withParameters(parameters).build();
ResultSet resultSet = preparedStatement.executeQuery()) {
// Return a single partition if no partitions defined
if (!resultSet.next()) {
blockWriter.writeRows((Block block, int rowNum) -> {
block.setValue(BLOCK_PARTITION_SCHEMA_COLUMN_NAME, rowNum, ALL_PARTITIONS);
block.setValue(BLOCK_PARTITION_COLUMN_NAME, rowNum, ALL_PARTITIONS);
// we wrote 1 row so we return 1
return 1;
});
} else {
do {
final String partitionSchemaName = resultSet.getString(PARTITION_SCHEMA_NAME);
final String partitionName = resultSet.getString(PARTITION_NAME);
// 1. Returns all partitions of table, we are not supporting constraints push down to filter partitions.
// 2. This API is not paginated, we could use order by and limit clause with offsets here.
blockWriter.writeRows((Block block, int rowNum) -> {
block.setValue(BLOCK_PARTITION_SCHEMA_COLUMN_NAME, rowNum, partitionSchemaName);
block.setValue(BLOCK_PARTITION_COLUMN_NAME, rowNum, partitionName);
// we wrote 1 row so we return 1
return 1;
});
} while (resultSet.next());
}
}
} catch (SQLException sqlException) {
throw new RuntimeException(sqlException.getErrorCode() + ": " + sqlException.getMessage(), sqlException);
}
}
use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class SaphanaMetadataHandler method getPartitions.
/**
* We are first checking if input table is a view, if it's a view, it will not have any partition info and
* data will be fetched with single split.If it is a table with no partition, then data will be fetched with single split.
* If it is a partitioned table, we are fetching the partition info and creating splits equals to the number of partitions
* for parallel processing.
* @param blockWriter
* @param getTableLayoutRequest
* @param queryStatusChecker
* @throws Exception
*/
@Override
public void getPartitions(BlockWriter blockWriter, GetTableLayoutRequest getTableLayoutRequest, QueryStatusChecker queryStatusChecker) throws Exception {
LOGGER.debug("{}: Schema {}, table {}", getTableLayoutRequest.getQueryId(), getTableLayoutRequest.getTableName().getSchemaName(), getTableLayoutRequest.getTableName().getTableName());
// check if the input table is a view
boolean viewFlag = false;
List<String> viewparameters = Arrays.asList(getTableLayoutRequest.getTableName().getSchemaName(), getTableLayoutRequest.getTableName().getTableName());
try (Connection connection = getJdbcConnectionFactory().getConnection(getCredentialProvider())) {
try (PreparedStatement preparedStatement = new PreparedStatementBuilder().withConnection(connection).withQuery(SaphanaConstants.VIEW_CHECK_QUERY).withParameters(viewparameters).build();
ResultSet resultSet = preparedStatement.executeQuery()) {
if (resultSet.next()) {
viewFlag = true;
}
LOGGER.debug("viewFlag: {}", viewFlag);
} catch (SQLException sqlException) {
LOGGER.debug("Exception while querying view details for view {}", getTableLayoutRequest.getTableName().getTableName());
throw new SQLException(sqlException.getErrorCode() + ": " + sqlException.getMessage(), sqlException);
}
}
// For view create a single split
if (viewFlag) {
blockWriter.writeRows((Block block, int rowNum) -> {
block.setValue(SaphanaConstants.BLOCK_PARTITION_COLUMN_NAME, rowNum, SaphanaConstants.ALL_PARTITIONS);
return 1;
});
} else {
List<String> parameters = Arrays.asList(getTableLayoutRequest.getTableName().getTableName(), getTableLayoutRequest.getTableName().getSchemaName());
try (Connection connection = getJdbcConnectionFactory().getConnection(getCredentialProvider())) {
try (PreparedStatement preparedStatement = new PreparedStatementBuilder().withConnection(connection).withQuery(SaphanaConstants.GET_PARTITIONS_QUERY).withParameters(parameters).build();
ResultSet resultSet = preparedStatement.executeQuery()) {
// Return a single partition if no partitions defined
if (!resultSet.next()) {
blockWriter.writeRows((Block block, int rowNum) -> {
block.setValue(SaphanaConstants.BLOCK_PARTITION_COLUMN_NAME, rowNum, SaphanaConstants.ALL_PARTITIONS);
// we wrote 1 row so we return 1
return 1;
});
} else {
do {
final String partitionName = resultSet.getString(SaphanaConstants.BLOCK_PARTITION_COLUMN_NAME);
// 1. Returns all partitions of table, we are not supporting constraints push down to filter partitions.
// 2. This API is not paginated, we could use order by and limit clause with offsets here.
blockWriter.writeRows((Block block, int rowNum) -> {
block.setValue(SaphanaConstants.BLOCK_PARTITION_COLUMN_NAME, rowNum, partitionName);
// we wrote 1 row so we return 1
return 1;
});
} while (resultSet.next());
}
}
} catch (SQLException sqlException) {
throw new SQLException(sqlException.getErrorCode() + ": " + sqlException.getMessage(), sqlException);
}
}
}
use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class SaphanaMetadataHandler method doGetSplits.
/**
* @param blockAllocator
* @param getSplitsRequest
* @return
*/
@Override
public GetSplitsResponse doGetSplits(BlockAllocator blockAllocator, GetSplitsRequest getSplitsRequest) {
LOGGER.debug("{}: Catalog {}, table {}", getSplitsRequest.getQueryId(), getSplitsRequest.getTableName().getSchemaName(), getSplitsRequest.getTableName().getTableName());
int partitionContd = decodeContinuationToken(getSplitsRequest);
Set<Split> splits = new HashSet<>();
Block partitions = getSplitsRequest.getPartitions();
// TODO consider splitting further depending on #rows or data size. Could use Hash key for splitting if no partitions.
for (int curPartition = partitionContd; curPartition < partitions.getRowCount(); curPartition++) {
FieldReader locationReader = partitions.getFieldReader(SaphanaConstants.BLOCK_PARTITION_COLUMN_NAME);
locationReader.setPosition(curPartition);
SpillLocation spillLocation = makeSpillLocation(getSplitsRequest);
LOGGER.debug("{}: Input partition is {}", getSplitsRequest.getQueryId(), locationReader.readText());
Split.Builder splitBuilder = Split.newBuilder(spillLocation, makeEncryptionKey()).add(SaphanaConstants.BLOCK_PARTITION_COLUMN_NAME, String.valueOf(locationReader.readText()));
splits.add(splitBuilder.build());
if (splits.size() >= SaphanaConstants.MAX_SPLITS_PER_REQUEST) {
// We exceeded the number of split we want to return in a single request, return and provide a continuation token.
return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, encodeContinuationToken(curPartition + 1));
}
}
return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, null);
}
use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class SnowflakeMetadataHandler method getPartitions.
/**
* Snowflake manual partition logic based upon number of records
* @param blockWriter
* @param getTableLayoutRequest
* @param queryStatusChecker
* @throws Exception
*/
@Override
public void getPartitions(BlockWriter blockWriter, GetTableLayoutRequest getTableLayoutRequest, QueryStatusChecker queryStatusChecker) throws Exception {
LOGGER.info("{}: Schema {}, table {}", getTableLayoutRequest.getQueryId(), getTableLayoutRequest.getTableName().getSchemaName(), getTableLayoutRequest.getTableName().getTableName());
Map<String, String> properties = System.getenv();
/**
* Customized environment variable "pagecount" for pagination based partition. It is currently set to 500000.
* It means there will be 500000 rows per partition. The number of partition will be total number of rows divided by
* pagecount variable value.
*/
String pagecount = properties.get("pagecount");
Long totalpagecount = Long.valueOf(pagecount);
/**
* Customized environment variable "partitionlimit" to limit the number of partitions.
* this is to handle timeout issues because of huge partitions
*/
String partitionlimit = properties.get("partitionlimit");
Long totalPartitionlimit = Long.valueOf(partitionlimit);
LOGGER.info(" Total Partition Limit" + totalPartitionlimit);
LOGGER.info(" Total Page Count" + totalpagecount);
long offset = 0;
double limit = 0;
double totalRecordCount = 0;
boolean viewFlag = checkForView(getTableLayoutRequest);
// if the input table is a view , there will be single split
if (viewFlag) {
blockWriter.writeRows((Block block, int rowNum) -> {
block.setValue(BLOCK_PARTITION_COLUMN_NAME, rowNum, ALL_PARTITIONS);
return 1;
});
} else {
LOGGER.info(COUNT_RECORDS_QUERY);
List<String> parameters = Arrays.asList(getTableLayoutRequest.getTableName().getTableName());
try (Connection connection = getJdbcConnectionFactory().getConnection(getCredentialProvider());
PreparedStatement preparedStatement = new PreparedStatementBuilder().withConnection(connection).withQuery(COUNT_RECORDS_QUERY).withParameters(parameters).build();
ResultSet rs = preparedStatement.executeQuery()) {
while (rs.next()) {
totalRecordCount = rs.getInt(1);
}
double limitValue = totalRecordCount / totalpagecount;
limit = (int) Math.ceil(limitValue);
if (totalRecordCount > 0) {
// it will be treated as a single partition.
if (limit > totalPartitionlimit) {
final String partitionVal = BLOCK_PARTITION_COLUMN_NAME + "-limit-" + totalRecordCount + "-offset-" + offset;
LOGGER.info("partitionVal {} ", partitionVal);
blockWriter.writeRows((Block block, int rowNum) -> {
block.setValue(BLOCK_PARTITION_COLUMN_NAME, rowNum, partitionVal);
return 1;
});
} else {
/**
* Custom pagination based partition logic will be applied with limit and offset clauses.
* the partition values we are setting the limit and offste values like p-limit-3000-offset-0
*/
for (int i = 1; i <= limit; i++) {
if (i > 1) {
offset = offset + totalpagecount;
}
final String partitionVal = BLOCK_PARTITION_COLUMN_NAME + "-limit-" + pagecount + "-offset-" + offset;
LOGGER.info("partitionVal {} ", partitionVal);
blockWriter.writeRows((Block block, int rowNum) -> {
block.setValue(BLOCK_PARTITION_COLUMN_NAME, rowNum, partitionVal);
return 1;
});
}
}
} else {
LOGGER.info("No Records Found for table {}", getTableLayoutRequest.getTableName().getTableName());
}
} catch (SQLException sqlException) {
throw new RuntimeException(sqlException.getErrorCode() + ": " + sqlException.getMessage(), sqlException);
} catch (Exception exception) {
LOGGER.error("Error occurred while getting the results", exception);
}
}
}
use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class SnowflakeMetadataHandler method doGetSplits.
@Override
public GetSplitsResponse doGetSplits(BlockAllocator blockAllocator, GetSplitsRequest getSplitsRequest) {
LOGGER.info("{}: Catalog {}, table {}", getSplitsRequest.getQueryId(), getSplitsRequest.getTableName().getSchemaName(), getSplitsRequest.getTableName().getTableName());
int partitionContd = decodeContinuationToken(getSplitsRequest);
Set<Split> splits = new HashSet<>();
Block partitions = getSplitsRequest.getPartitions();
// TODO consider splitting further depending on #rows or data size. Could use Hash key for splitting if no partitions.
for (int curPartition = partitionContd; curPartition < partitions.getRowCount(); curPartition++) {
FieldReader locationReader = partitions.getFieldReader(BLOCK_PARTITION_COLUMN_NAME);
locationReader.setPosition(curPartition);
SpillLocation spillLocation = makeSpillLocation(getSplitsRequest);
LOGGER.info("{}: Input partition is {}", getSplitsRequest.getQueryId(), locationReader.readText());
Split.Builder splitBuilder = Split.newBuilder(spillLocation, makeEncryptionKey()).add(BLOCK_PARTITION_COLUMN_NAME, String.valueOf(locationReader.readText()));
splits.add(splitBuilder.build());
if (splits.size() >= MAX_SPLITS_PER_REQUEST) {
// We exceeded the number of split we want to return in a single request, return and provide a continuation token.
return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, encodeContinuationToken(curPartition + 1));
}
}
return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, null);
}
Aggregations