use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class SqlServerMetadataHandler method getPartitions.
/**
* Check whether input table is a view or not. If it's a view, it will not have any partition info and
* data will be fetched with single split.If it's a table with no partition, then data will be fetched with single split.
* If it's a partitioned table, we are fetching the partition info and creating splits equals to the number of partitions
* for parallel processing.
* @param blockWriter
* @param getTableLayoutRequest
* @param queryStatusChecker
* @throws Exception
*/
@Override
public void getPartitions(BlockWriter blockWriter, GetTableLayoutRequest getTableLayoutRequest, QueryStatusChecker queryStatusChecker) throws Exception {
LOGGER.info("{}: Schema {}, table {}", getTableLayoutRequest.getQueryId(), getTableLayoutRequest.getTableName().getSchemaName(), getTableLayoutRequest.getTableName().getTableName());
List<String> params = Arrays.asList(getTableLayoutRequest.getTableName().getTableName(), getTableLayoutRequest.getTableName().getSchemaName());
// check whether the input table is a view or not
String viewFlag = "N";
try (Connection connection = getJdbcConnectionFactory().getConnection(getCredentialProvider());
PreparedStatement preparedStatement = new PreparedStatementBuilder().withConnection(connection).withQuery(VIEW_CHECK_QUERY).withParameters(params).build();
ResultSet resultSet = preparedStatement.executeQuery()) {
if (resultSet.next()) {
viewFlag = "VIEW".equalsIgnoreCase(resultSet.getString("TYPE_DESC")) ? "Y" : "N";
}
LOGGER.info("viewFlag: {}", viewFlag);
} catch (SQLException sqlException) {
throw new SQLException(sqlException.getErrorCode() + ": " + sqlException.getMessage(), sqlException);
}
List<String> parameters = Arrays.asList(getTableLayoutRequest.getTableName().getSchemaName() + "." + getTableLayoutRequest.getTableName().getTableName());
try (Connection connection = getJdbcConnectionFactory().getConnection(getCredentialProvider())) {
try (PreparedStatement preparedStatement = new PreparedStatementBuilder().withConnection(connection).withQuery(GET_PARTITIONS_QUERY).withParameters(parameters).build();
PreparedStatement preparedStatement2 = new PreparedStatementBuilder().withConnection(connection).withQuery(ROW_COUNT_QUERY).withParameters(parameters).build();
ResultSet resultSet = preparedStatement.executeQuery();
ResultSet resultSet2 = preparedStatement2.executeQuery()) {
// check whether the table have partitions or not using ROW_COUNT_QUERY
if (resultSet2.next()) {
rowCount = resultSet2.getInt("ROW_COUNT");
LOGGER.info("rowCount: {}", rowCount);
}
// create a single split for view/non-partition table
if ("Y".equals(viewFlag) || rowCount == 0) {
LOGGER.debug("Getting as single Partition: ");
blockWriter.writeRows((Block block, int rowNum) -> {
block.setValue(PARTITION_NUMBER, rowNum, ALL_PARTITIONS);
// we wrote 1 row so we return 1
return 1;
});
} else {
LOGGER.debug("Getting data with diff Partitions: ");
// get partition details from sql server meta data tables
getPartitionFunction(params);
// Include the first partition because it's not retrieved from GET_PARTITIONS_QUERY
blockWriter.writeRows((Block block, int rowNum) -> {
block.setValue(PARTITION_NUMBER, rowNum, "1");
return 1;
});
if (resultSet.next()) {
do {
final String partitionNumber = resultSet.getString(PARTITION_NUMBER);
// 1. Returns all partitions of table, we are not supporting constraints push down to filter partitions.
// 2. This API is not paginated, we could use order by and limit clause with offsets here.
blockWriter.writeRows((Block block, int rowNum) -> {
block.setValue(PARTITION_NUMBER, rowNum, partitionNumber);
// we wrote 1 row so we return 1
return 1;
});
} while (resultSet.next());
}
}
}
} catch (SQLException sqlException) {
throw new SQLException(sqlException.getErrorCode() + ": " + sqlException.getMessage(), sqlException);
}
}
use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class TimestreamMetadataHandlerTest method doGetSplits.
@Test
public void doGetSplits() throws Exception {
logger.info("doGetSplits - enter");
List<String> partitionCols = new ArrayList<>();
Block partitions = BlockUtils.newBlock(allocator, "partition_id", Types.MinorType.INT.getType(), 0);
String continuationToken = null;
GetSplitsRequest originalReq = new GetSplitsRequest(identity, "query-id", defaultSchema, new TableName("database1", "table1"), partitions, partitionCols, new Constraints(new HashMap<>()), null);
GetSplitsRequest req = new GetSplitsRequest(originalReq, continuationToken);
logger.info("doGetSplits: req[{}]", req);
MetadataResponse rawResponse = handler.doGetSplits(allocator, req);
assertEquals(MetadataRequestType.GET_SPLITS, rawResponse.getRequestType());
GetSplitsResponse response = (GetSplitsResponse) rawResponse;
continuationToken = response.getContinuationToken();
logger.info("doGetSplits: continuationToken[{}] - numSplits[{}]", new Object[] { continuationToken, response.getSplits().size() });
assertTrue("Continuation criteria violated", response.getSplits().size() == 1);
assertTrue("Continuation criteria violated", response.getContinuationToken() == null);
logger.info("doGetSplits - exit");
}
use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class TimestreamMetadataHandlerTest method doGetTableLayout.
@Test
public void doGetTableLayout() throws Exception {
logger.info("doGetTableLayout - enter");
Schema schema = SchemaBuilder.newBuilder().build();
GetTableLayoutRequest req = new GetTableLayoutRequest(identity, "query-id", defaultSchema, new TableName("database1", "table1"), new Constraints(new HashMap<>()), schema, Collections.EMPTY_SET);
GetTableLayoutResponse res = handler.doGetTableLayout(allocator, req);
logger.info("doGetTableLayout - {}", res);
Block partitions = res.getPartitions();
for (int row = 0; row < partitions.getRowCount() && row < 10; row++) {
logger.info("doGetTableLayout:{} {}", row, BlockUtils.rowToString(partitions, row));
}
assertTrue(partitions.getRowCount() == 1);
logger.info("doGetTableLayout - exit");
}
use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class TPCDSMetadataHandlerTest method doGetSplits.
@Test
public void doGetSplits() {
logger.info("doGetSplits: enter");
Schema schema = SchemaBuilder.newBuilder().addIntField("partitionId").build();
Block partitions = BlockUtils.newBlock(allocator, "partitionId", Types.MinorType.INT.getType(), 1);
String continuationToken = null;
GetSplitsRequest originalReq = new GetSplitsRequest(identity, "queryId", "catalog_name", new TableName("tpcds1", "customer"), partitions, Collections.EMPTY_LIST, new Constraints(new HashMap<>()), continuationToken);
int numContinuations = 0;
do {
GetSplitsRequest req = new GetSplitsRequest(originalReq, continuationToken);
logger.info("doGetSplits: req[{}]", req);
MetadataResponse rawResponse = handler.doGetSplits(allocator, req);
assertEquals(MetadataRequestType.GET_SPLITS, rawResponse.getRequestType());
GetSplitsResponse response = (GetSplitsResponse) rawResponse;
continuationToken = response.getContinuationToken();
logger.info("doGetSplits: continuationToken[{}] - numSplits[{}]", continuationToken, response.getSplits().size());
for (Split nextSplit : response.getSplits()) {
assertNotNull(nextSplit.getProperty(SPLIT_NUMBER_FIELD));
assertNotNull(nextSplit.getProperty(SPLIT_TOTAL_NUMBER_FIELD));
assertNotNull(nextSplit.getProperty(SPLIT_SCALE_FACTOR_FIELD));
}
if (continuationToken != null) {
numContinuations++;
}
} while (continuationToken != null);
assertTrue(numContinuations == 0);
logger.info("doGetSplits: exit");
}
use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class TPCDSRecordHandlerTest method doReadRecordsSpill.
@Test
public void doReadRecordsSpill() throws Exception {
logger.info("doReadRecordsSpill: enter");
Map<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put("c_current_cdemo_sk", SortedRangeSet.of(Range.range(allocator, Types.MinorType.BIGINT.getType(), 100L, true, 100_000_000L, true)));
ReadRecordsRequest request = new ReadRecordsRequest(identity, "catalog", "queryId-" + System.currentTimeMillis(), new TableName("tpcds1", table.getName()), schemaForRead, Split.newBuilder(S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build(), keyFactory.create()).add(SPLIT_NUMBER_FIELD, "0").add(SPLIT_TOTAL_NUMBER_FIELD, "10000").add(SPLIT_SCALE_FACTOR_FIELD, "1").build(), new Constraints(constraintsMap), // ~1.5MB so we should see some spill
1_500_000L, 0);
RecordResponse rawResponse = handler.doReadRecords(allocator, request);
assertTrue(rawResponse instanceof RemoteReadRecordsResponse);
try (RemoteReadRecordsResponse response = (RemoteReadRecordsResponse) rawResponse) {
logger.info("doReadRecordsSpill: remoteBlocks[{}]", response.getRemoteBlocks().size());
assertTrue(response.getNumberBlocks() > 1);
int blockNum = 0;
for (SpillLocation next : response.getRemoteBlocks()) {
S3SpillLocation spillLocation = (S3SpillLocation) next;
try (Block block = spillReader.read(spillLocation, response.getEncryptionKey(), response.getSchema())) {
logger.info("doReadRecordsSpill: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount());
// assertTrue(++blockNum < response.getRemoteBlocks().size() && block.getRowCount() > 10_000);
logger.info("doReadRecordsSpill: {}", BlockUtils.rowToString(block, 0));
assertNotNull(BlockUtils.rowToString(block, 0));
}
}
}
logger.info("doReadRecordsSpill: exit");
}
Aggregations