use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class RedisRecordHandler method loadLiteralRow.
private void loadLiteralRow(RedisCommandsWrapper<String, String> syncCommands, String keyString, BlockSpiller spiller, List<Field> fieldList) {
spiller.writeRows((Block block, int row) -> {
if (fieldList.size() != 1) {
throw new RuntimeException("Ambiguous field mapping, more than 1 field for literal value type.");
}
Field field = fieldList.get(0);
Object value = ValueConverter.convert(field, syncCommands.get(keyString));
boolean literalMatched = block.offerValue(KEY_COLUMN_NAME, row, keyString);
literalMatched &= block.offerValue(field.getName(), row, value);
return literalMatched ? 1 : 0;
});
}
use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class SqlServerMetadataHandler method doGetSplits.
/**
* @param blockAllocator
* @param getSplitsRequest
* @return
*/
@Override
public GetSplitsResponse doGetSplits(BlockAllocator blockAllocator, GetSplitsRequest getSplitsRequest) {
LOGGER.info("{}: Catalog {}, table {}", getSplitsRequest.getQueryId(), getSplitsRequest.getTableName().getSchemaName(), getSplitsRequest.getTableName().getTableName());
int partitionContd = decodeContinuationToken(getSplitsRequest);
LOGGER.info("partitionContd: {}", partitionContd);
Set<Split> splits = new HashSet<>();
Block partitions = getSplitsRequest.getPartitions();
for (int curPartition = partitionContd; curPartition < partitions.getRowCount(); curPartition++) {
FieldReader locationReader = partitions.getFieldReader(PARTITION_NUMBER);
locationReader.setPosition(curPartition);
SpillLocation spillLocation = makeSpillLocation(getSplitsRequest);
LOGGER.debug("{}: Input partition is {}", getSplitsRequest.getQueryId(), locationReader.readText());
Split.Builder splitBuilder;
// Included partition information to split if the table is partitioned
if (partitionFunction != null && partitioningColumn != null) {
splitBuilder = Split.newBuilder(spillLocation, makeEncryptionKey()).add(PARTITION_NUMBER, String.valueOf(locationReader.readText())).add(PARTITION_FUNCTION, partitionFunction).add(PARTITIONING_COLUMN, partitioningColumn);
} else {
splitBuilder = Split.newBuilder(spillLocation, makeEncryptionKey()).add(PARTITION_NUMBER, String.valueOf(locationReader.readText()));
}
splits.add(splitBuilder.build());
if (splits.size() >= MAX_SPLITS_PER_REQUEST) {
// We exceeded the number of split we want to return in a single request, return and provide a continuation token.
return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, encodeContinuationToken(curPartition));
}
}
return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, null);
}
use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class SynapseMetadataHandler method doGetSplits.
/**
* @param blockAllocator
* @param getSplitsRequest
* @return
*/
@Override
public GetSplitsResponse doGetSplits(BlockAllocator blockAllocator, GetSplitsRequest getSplitsRequest) {
LOGGER.info("{}: Catalog {}, table {}", getSplitsRequest.getQueryId(), getSplitsRequest.getTableName().getSchemaName(), getSplitsRequest.getTableName().getTableName());
int partitionContd = decodeContinuationToken(getSplitsRequest);
Set<Split> splits = new HashSet<>();
Block partitions = getSplitsRequest.getPartitions();
for (int curPartition = partitionContd; curPartition < partitions.getRowCount(); curPartition++) {
FieldReader locationReader = partitions.getFieldReader(PARTITION_NUMBER);
locationReader.setPosition(curPartition);
SpillLocation spillLocation = makeSpillLocation(getSplitsRequest);
LOGGER.debug("{}: Input partition is {}", getSplitsRequest.getQueryId(), locationReader.readText());
Split.Builder splitBuilder;
String partInfo = String.valueOf(locationReader.readText());
// Included partition information to split if the table is partitioned
if (partInfo.contains(":::")) {
String[] partInfoAr = partInfo.split(":::");
splitBuilder = Split.newBuilder(spillLocation, makeEncryptionKey()).add(PARTITION_NUMBER, partInfoAr[0]).add(PARTITION_BOUNDARY_FROM, partInfoAr[1]).add(PARTITION_BOUNDARY_TO, partInfoAr[2]).add(PARTITION_COLUMN, partInfoAr[3]);
} else {
splitBuilder = Split.newBuilder(spillLocation, makeEncryptionKey()).add(PARTITION_NUMBER, partInfo);
}
splits.add(splitBuilder.build());
if (splits.size() >= MAX_SPLITS_PER_REQUEST) {
// We exceeded the number of split we want to return in a single request, return and provide a continuation token.
return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, encodeContinuationToken(curPartition));
}
}
return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, null);
}
use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class SynapseMetadataHandler method getPartitions.
/**
* Partition metadata queries will be extracted from template files, we can check whether the table is partitioned or not using these queries.
* If it is a table with no partition, then data will be fetched with single split.
* If it is a partitioned table, we are fetching the partition info and creating splits equals to the number of partitions
* for parallel processing.
* @param blockWriter
* @param getTableLayoutRequest
* @param queryStatusChecker
* @throws Exception
*/
@Override
public void getPartitions(BlockWriter blockWriter, GetTableLayoutRequest getTableLayoutRequest, QueryStatusChecker queryStatusChecker) throws Exception {
LOGGER.info("{}: Schema {}, table {}", getTableLayoutRequest.getQueryId(), getTableLayoutRequest.getTableName().getSchemaName(), getTableLayoutRequest.getTableName().getTableName());
/**
* Queries formed through String Template for retrieving Azure Synapse table partitions
*/
STGroup stGroup = new STGroupDir("templates", '$', '$');
ST getPartitionsSt = stGroup.getInstanceOf("getPartitions");
getPartitionsSt.add("name", getTableLayoutRequest.getTableName().getTableName());
getPartitionsSt.add("schemaname", getTableLayoutRequest.getTableName().getSchemaName());
ST rowCountSt = stGroup.getInstanceOf("rowCount");
rowCountSt.add("name", getTableLayoutRequest.getTableName().getTableName());
rowCountSt.add("schemaname", getTableLayoutRequest.getTableName().getSchemaName());
try (Connection connection = getJdbcConnectionFactory().getConnection(getCredentialProvider())) {
try (Statement st = connection.createStatement();
Statement st2 = connection.createStatement();
ResultSet resultSet = st.executeQuery(getPartitionsSt.render());
ResultSet resultSet2 = st2.executeQuery(rowCountSt.render())) {
// check whether the table have partitions or not using ROW_COUNT_QUERY
if (resultSet2.next()) {
rowCount = resultSet2.getInt("ROW_COUNT");
LOGGER.info("rowCount: {}", rowCount);
}
// create a single split for view/non-partition table
if (rowCount == 0) {
LOGGER.debug("Getting as single Partition: ");
blockWriter.writeRows((Block block, int rowNum) -> {
block.setValue(PARTITION_NUMBER, rowNum, ALL_PARTITIONS);
// we wrote 1 row so we return 1
return 1;
});
} else {
LOGGER.debug("Getting data with diff Partitions: ");
/*
Synapse supports Range Partitioning. Partition column, partition range values are extracted from Synapse metadata tables.
partition boundaries will be formed using those values.
Ex: if partition column is 'col1', partition range values are 10, 200, null then
below partition boundaries will be created to form custom queries for splits
1::: :::10:::col1, 2:::10:::200:::col1, 3:::200::: :::col1
*/
while (resultSet.next()) {
final String partitionNumber = resultSet.getString(PARTITION_NUMBER);
LOGGER.debug("partitionNumber: {}", partitionNumber);
if ("1".equals(partitionNumber)) {
partitionBoundaryFrom = " ";
partitionColumn = resultSet.getString(PARTITION_COLUMN);
LOGGER.debug("partitionColumn: {}", partitionColumn);
} else {
partitionBoundaryFrom = partitionBoundaryTo;
}
partitionBoundaryTo = resultSet.getString("PARTITION_BOUNDARY_VALUE");
partitionBoundaryTo = (partitionBoundaryTo == null) ? " " : partitionBoundaryTo;
// 1. Returns all partitions of table, we are not supporting constraints push down to filter partitions.
// 2. This API is not paginated, we could use order by and limit clause with offsets here.
blockWriter.writeRows((Block block, int rowNum) -> {
// creating the partition boundaries
block.setValue(PARTITION_NUMBER, rowNum, partitionNumber + ":::" + partitionBoundaryFrom + ":::" + partitionBoundaryTo + ":::" + partitionColumn);
// we wrote 1 row so we return 1
return 1;
});
}
}
}
} catch (SQLException sqlException) {
throw new SQLException(sqlException.getErrorCode() + ": " + sqlException.getMessage(), sqlException);
}
}
use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class TeradataMetadataHandler method doGetSplits.
@Override
public GetSplitsResponse doGetSplits(BlockAllocator blockAllocator, GetSplitsRequest getSplitsRequest) {
LOGGER.info("{}: Catalog {}, table {}", getSplitsRequest.getQueryId(), getSplitsRequest.getTableName().getSchemaName(), getSplitsRequest.getTableName().getTableName());
int partitionContd = decodeContinuationToken(getSplitsRequest);
Set<Split> splits = new HashSet<>();
Block partitions = getSplitsRequest.getPartitions();
// TODO consider splitting further depending on #rows or data size. Could use Hash key for splitting if no partitions.
for (int curPartition = partitionContd; curPartition < partitions.getRowCount(); curPartition++) {
FieldReader locationReader = partitions.getFieldReader(BLOCK_PARTITION_COLUMN_NAME);
locationReader.setPosition(curPartition);
SpillLocation spillLocation = makeSpillLocation(getSplitsRequest);
LOGGER.info("{}: Input partition is {}", getSplitsRequest.getQueryId(), locationReader.readText());
Split.Builder splitBuilder = Split.newBuilder(spillLocation, makeEncryptionKey()).add(BLOCK_PARTITION_COLUMN_NAME, String.valueOf(locationReader.readText()));
splits.add(splitBuilder.build());
if (splits.size() >= MAX_SPLITS_PER_REQUEST) {
// We exceeded the number of split we want to return in a single request, return and provide a continuation token.
return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, encodeContinuationToken(curPartition + 1));
}
}
return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, null);
}
Aggregations