use of com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation in project aws-athena-query-federation by awslabs.
the class MySqlMetadataHandler method doGetSplits.
@Override
public GetSplitsResponse doGetSplits(final BlockAllocator blockAllocator, final GetSplitsRequest getSplitsRequest) {
LOGGER.info("{}: Catalog {}, table {}", getSplitsRequest.getQueryId(), getSplitsRequest.getTableName().getSchemaName(), getSplitsRequest.getTableName().getTableName());
int partitionContd = decodeContinuationToken(getSplitsRequest);
Set<Split> splits = new HashSet<>();
Block partitions = getSplitsRequest.getPartitions();
// TODO consider splitting further depending on #rows or data size. Could use Hash key for splitting if no partitions.
for (int curPartition = partitionContd; curPartition < partitions.getRowCount(); curPartition++) {
FieldReader locationReader = partitions.getFieldReader(BLOCK_PARTITION_COLUMN_NAME);
locationReader.setPosition(curPartition);
SpillLocation spillLocation = makeSpillLocation(getSplitsRequest);
LOGGER.info("{}: Input partition is {}", getSplitsRequest.getQueryId(), locationReader.readText());
Split.Builder splitBuilder = Split.newBuilder(spillLocation, makeEncryptionKey()).add(BLOCK_PARTITION_COLUMN_NAME, String.valueOf(locationReader.readText()));
splits.add(splitBuilder.build());
if (splits.size() >= MAX_SPLITS_PER_REQUEST) {
// We exceeded the number of split we want to return in a single request, return and provide a continuation token.
return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, encodeContinuationToken(curPartition + 1));
}
}
return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, null);
}
use of com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation in project aws-athena-query-federation by awslabs.
the class OracleMetadataHandler method doGetSplits.
/**
* @param blockAllocator
* @param getSplitsRequest
* @return
*/
@Override
public GetSplitsResponse doGetSplits(final BlockAllocator blockAllocator, final GetSplitsRequest getSplitsRequest) {
LOGGER.debug("{}: Catalog {}, table {}", getSplitsRequest.getQueryId(), getSplitsRequest.getTableName().getSchemaName(), getSplitsRequest.getTableName().getTableName());
int partitionContd = decodeContinuationToken(getSplitsRequest);
Set<Split> splits = new HashSet<>();
Block partitions = getSplitsRequest.getPartitions();
// TODO consider splitting further depending on #rows or data size. Could use Hash key for splitting if no partitions.
for (int curPartition = partitionContd; curPartition < partitions.getRowCount(); curPartition++) {
FieldReader locationReader = partitions.getFieldReader(BLOCK_PARTITION_COLUMN_NAME);
locationReader.setPosition(curPartition);
SpillLocation spillLocation = makeSpillLocation(getSplitsRequest);
LOGGER.info("{}: Input partition is {}", getSplitsRequest.getQueryId(), locationReader.readText());
Split.Builder splitBuilder = Split.newBuilder(spillLocation, makeEncryptionKey()).add(BLOCK_PARTITION_COLUMN_NAME, String.valueOf(locationReader.readText()));
splits.add(splitBuilder.build());
if (splits.size() >= MAX_SPLITS_PER_REQUEST) {
// We exceeded the number of split we want to return in a single request, return and provide a continuation token.
return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, encodeContinuationToken(curPartition + 1));
}
}
return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, null);
}
use of com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation in project aws-athena-query-federation by awslabs.
the class RedisMetadataHandler method makeSplits.
/**
* For a given key prefix this method attempts to break up all the matching keys into N buckets (aka N splits).
*
* @param request
* @param syncCommands The Lettuce Client
* @param endpoint The redis endpoint to query.
* @param keyPrefix The key prefix to scan.
* @param keyType The KeyType (prefix or zset).
* @param valueType The ValueType, used for mapping the values stored at each key to a result row when the split is processed.
* @param sslEnabled The value from the REDIS_SSL_FLAG on the table being queried.
* @param isCluster The value from the REDIS_CLUSTER_FLAG on the table being queried.
* @param dbNumber The value from the REDIS_DB_NUMBER on the table being queried.
* @return A Set of splits to optionally parallelize reading the values associated with the keyPrefix.
*/
private Set<Split> makeSplits(GetSplitsRequest request, RedisCommandsWrapper<String, String> syncCommands, String endpoint, String keyPrefix, KeyType keyType, String valueType, boolean sslEnabled, boolean isCluster, String dbNumber) {
Set<Split> splits = new HashSet<>();
long numberOfKeys = 1;
if (keyType == KeyType.ZSET) {
numberOfKeys = syncCommands.zcount(keyPrefix, Range.unbounded());
logger.info("makeSplits: ZCOUNT[{}] found [{}]", keyPrefix, numberOfKeys);
}
long stride = (numberOfKeys > REDIS_MAX_SPLITS) ? 1 + (numberOfKeys / REDIS_MAX_SPLITS) : numberOfKeys;
for (long startIndex = 0; startIndex < numberOfKeys; startIndex += stride) {
long endIndex = startIndex + stride - 1;
if (endIndex >= numberOfKeys) {
endIndex = -1;
}
// Every split must have a unique location if we wish to spill to avoid failures
SpillLocation spillLocation = makeSpillLocation(request);
Split split = Split.newBuilder(spillLocation, makeEncryptionKey()).add(KEY_PREFIX_TABLE_PROP, keyPrefix).add(KEY_TYPE, keyType.getId()).add(VALUE_TYPE_TABLE_PROP, valueType).add(REDIS_ENDPOINT_PROP, endpoint).add(SPLIT_START_INDEX, String.valueOf(startIndex)).add(SPLIT_END_INDEX, String.valueOf(endIndex)).add(REDIS_SSL_FLAG, String.valueOf(sslEnabled)).add(REDIS_CLUSTER_FLAG, String.valueOf(isCluster)).add(REDIS_DB_NUMBER, dbNumber).build();
splits.add(split);
logger.info("makeSplits: Split[{}]", split);
}
return splits;
}
use of com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation in project aws-athena-query-federation by awslabs.
the class SaphanaMetadataHandler method doGetSplits.
/**
* @param blockAllocator
* @param getSplitsRequest
* @return
*/
@Override
public GetSplitsResponse doGetSplits(BlockAllocator blockAllocator, GetSplitsRequest getSplitsRequest) {
LOGGER.debug("{}: Catalog {}, table {}", getSplitsRequest.getQueryId(), getSplitsRequest.getTableName().getSchemaName(), getSplitsRequest.getTableName().getTableName());
int partitionContd = decodeContinuationToken(getSplitsRequest);
Set<Split> splits = new HashSet<>();
Block partitions = getSplitsRequest.getPartitions();
// TODO consider splitting further depending on #rows or data size. Could use Hash key for splitting if no partitions.
for (int curPartition = partitionContd; curPartition < partitions.getRowCount(); curPartition++) {
FieldReader locationReader = partitions.getFieldReader(SaphanaConstants.BLOCK_PARTITION_COLUMN_NAME);
locationReader.setPosition(curPartition);
SpillLocation spillLocation = makeSpillLocation(getSplitsRequest);
LOGGER.debug("{}: Input partition is {}", getSplitsRequest.getQueryId(), locationReader.readText());
Split.Builder splitBuilder = Split.newBuilder(spillLocation, makeEncryptionKey()).add(SaphanaConstants.BLOCK_PARTITION_COLUMN_NAME, String.valueOf(locationReader.readText()));
splits.add(splitBuilder.build());
if (splits.size() >= SaphanaConstants.MAX_SPLITS_PER_REQUEST) {
// We exceeded the number of split we want to return in a single request, return and provide a continuation token.
return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, encodeContinuationToken(curPartition + 1));
}
}
return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, null);
}
use of com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation in project aws-athena-query-federation by awslabs.
the class SnowflakeMetadataHandler method doGetSplits.
@Override
public GetSplitsResponse doGetSplits(BlockAllocator blockAllocator, GetSplitsRequest getSplitsRequest) {
LOGGER.info("{}: Catalog {}, table {}", getSplitsRequest.getQueryId(), getSplitsRequest.getTableName().getSchemaName(), getSplitsRequest.getTableName().getTableName());
int partitionContd = decodeContinuationToken(getSplitsRequest);
Set<Split> splits = new HashSet<>();
Block partitions = getSplitsRequest.getPartitions();
// TODO consider splitting further depending on #rows or data size. Could use Hash key for splitting if no partitions.
for (int curPartition = partitionContd; curPartition < partitions.getRowCount(); curPartition++) {
FieldReader locationReader = partitions.getFieldReader(BLOCK_PARTITION_COLUMN_NAME);
locationReader.setPosition(curPartition);
SpillLocation spillLocation = makeSpillLocation(getSplitsRequest);
LOGGER.info("{}: Input partition is {}", getSplitsRequest.getQueryId(), locationReader.readText());
Split.Builder splitBuilder = Split.newBuilder(spillLocation, makeEncryptionKey()).add(BLOCK_PARTITION_COLUMN_NAME, String.valueOf(locationReader.readText()));
splits.add(splitBuilder.build());
if (splits.size() >= MAX_SPLITS_PER_REQUEST) {
// We exceeded the number of split we want to return in a single request, return and provide a continuation token.
return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, encodeContinuationToken(curPartition + 1));
}
}
return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, null);
}
Aggregations