use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class RedisMetadataHandlerTest method doGetSplitsZset.
@Test
public void doGetSplitsZset() {
// 3 prefixes for this table
String prefixes = "prefix1-*,prefix2-*, prefix3-*";
// 4 zsets per prefix
when(mockSyncCommands.scan(any(ScanCursor.class), any(ScanArgs.class))).then((InvocationOnMock invocationOnMock) -> {
ScanCursor cursor = (ScanCursor) invocationOnMock.getArguments()[0];
if (cursor == null || cursor.getCursor().equals("0")) {
List<String> result = new ArrayList<>();
result.add(UUID.randomUUID().toString());
result.add(UUID.randomUUID().toString());
result.add(UUID.randomUUID().toString());
MockKeyScanCursor<String> scanCursor = new MockKeyScanCursor<>();
scanCursor.setCursor("1");
scanCursor.setKeys(result);
return scanCursor;
} else {
List<String> result = new ArrayList<>();
result.add(UUID.randomUUID().toString());
MockKeyScanCursor<String> scanCursor = new MockKeyScanCursor<>();
scanCursor.setCursor("0");
scanCursor.setKeys(result);
scanCursor.setFinished(true);
return scanCursor;
}
});
// 100 keys per zset
when(mockSyncCommands.zcount(anyString(), any(Range.class))).thenReturn(200L);
List<String> partitionCols = new ArrayList<>();
Schema schema = SchemaBuilder.newBuilder().addField("partitionId", Types.MinorType.INT.getType()).addStringField(REDIS_ENDPOINT_PROP).addStringField(VALUE_TYPE_TABLE_PROP).addStringField(KEY_PREFIX_TABLE_PROP).addStringField(ZSET_KEYS_TABLE_PROP).addStringField(REDIS_SSL_FLAG).addStringField(REDIS_CLUSTER_FLAG).addStringField(REDIS_DB_NUMBER).build();
Block partitions = allocator.createBlock(schema);
partitions.setValue(REDIS_ENDPOINT_PROP, 0, endpoint);
partitions.setValue(VALUE_TYPE_TABLE_PROP, 0, "literal");
partitions.setValue(KEY_PREFIX_TABLE_PROP, 0, null);
partitions.setValue(ZSET_KEYS_TABLE_PROP, 0, prefixes);
partitions.setValue(REDIS_SSL_FLAG, 0, null);
partitions.setValue(REDIS_CLUSTER_FLAG, 0, null);
partitions.setValue(REDIS_DB_NUMBER, 0, null);
partitions.setRowCount(1);
String continuationToken = null;
GetSplitsRequest originalReq = new GetSplitsRequest(IDENTITY, QUERY_ID, DEFAULT_CATALOG, TABLE_NAME, partitions, partitionCols, new Constraints(new HashMap<>()), null);
GetSplitsRequest req = new GetSplitsRequest(originalReq, continuationToken);
logger.info("doGetSplitsPrefix: req[{}]", req);
MetadataResponse rawResponse = handler.doGetSplits(allocator, req);
assertEquals(MetadataRequestType.GET_SPLITS, rawResponse.getRequestType());
GetSplitsResponse response = (GetSplitsResponse) rawResponse;
continuationToken = response.getContinuationToken();
logger.info("doGetSplitsPrefix: continuationToken[{}] - numSplits[{}]", new Object[] { continuationToken, response.getSplits().size() });
assertEquals("Continuation criteria violated", 120, response.getSplits().size());
assertTrue("Continuation criteria violated", response.getContinuationToken() == null);
verify(mockSyncCommands, times(6)).scan(any(ScanCursor.class), any(ScanArgs.class));
}
use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class RedisMetadataHandlerTest method doGetSplitsPrefix.
@Test
public void doGetSplitsPrefix() {
Schema schema = SchemaBuilder.newBuilder().addField("partitionId", Types.MinorType.INT.getType()).addStringField(REDIS_ENDPOINT_PROP).addStringField(VALUE_TYPE_TABLE_PROP).addStringField(KEY_PREFIX_TABLE_PROP).addStringField(ZSET_KEYS_TABLE_PROP).addStringField(REDIS_SSL_FLAG).addStringField(REDIS_CLUSTER_FLAG).addStringField(REDIS_DB_NUMBER).build();
Block partitions = allocator.createBlock(schema);
partitions.setValue(REDIS_ENDPOINT_PROP, 0, endpoint);
partitions.setValue(VALUE_TYPE_TABLE_PROP, 0, "literal");
partitions.setValue(KEY_PREFIX_TABLE_PROP, 0, "prefix1-*,prefix2-*, prefix3-*");
partitions.setValue(ZSET_KEYS_TABLE_PROP, 0, null);
partitions.setValue(REDIS_SSL_FLAG, 0, null);
partitions.setValue(REDIS_CLUSTER_FLAG, 0, null);
partitions.setValue(REDIS_DB_NUMBER, 0, null);
partitions.setRowCount(1);
String continuationToken = null;
GetSplitsRequest originalReq = new GetSplitsRequest(IDENTITY, QUERY_ID, DEFAULT_CATALOG, TABLE_NAME, partitions, new ArrayList<>(), new Constraints(new HashMap<>()), null);
GetSplitsRequest req = new GetSplitsRequest(originalReq, continuationToken);
logger.info("doGetSplitsPrefix: req[{}]", req);
MetadataResponse rawResponse = handler.doGetSplits(allocator, req);
assertEquals(MetadataRequestType.GET_SPLITS, rawResponse.getRequestType());
GetSplitsResponse response = (GetSplitsResponse) rawResponse;
continuationToken = response.getContinuationToken();
logger.info("doGetSplitsPrefix: continuationToken[{}] - numSplits[{}]", new Object[] { continuationToken, response.getSplits().size() });
assertTrue("Continuation criteria violated", response.getSplits().size() == 3);
assertTrue("Continuation criteria violated", response.getContinuationToken() == null);
}
use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class RedisMetadataHandler method doGetSplits.
/**
* If the table is comprised of multiple key prefixes, then we parallelize those by making them each a split.
*
* @note This function essentially takes each key-prefix and makes it a split. For zset keys, it breaks each zset
* into a max of N split that we have configured to generate as defined by REDIS_MAX_SPLITS.
*/
@Override
public GetSplitsResponse doGetSplits(BlockAllocator blockAllocator, GetSplitsRequest request) {
if (request.getPartitions().getRowCount() != 1) {
throw new RuntimeException("Unexpected number of partitions encountered.");
}
Block partitions = request.getPartitions();
String redisEndpoint = getValue(partitions, 0, REDIS_ENDPOINT_PROP);
String redisValueType = getValue(partitions, 0, VALUE_TYPE_TABLE_PROP);
boolean sslEnabled = Boolean.parseBoolean(getValue(partitions, 0, REDIS_SSL_FLAG));
boolean isCluster = Boolean.parseBoolean(getValue(partitions, 0, REDIS_CLUSTER_FLAG));
String dbNumber = getValue(partitions, 0, REDIS_DB_NUMBER);
if (redisEndpoint == null) {
throw new RuntimeException("Table is missing " + REDIS_ENDPOINT_PROP + " table property");
}
if (redisValueType == null) {
throw new RuntimeException("Table is missing " + VALUE_TYPE_TABLE_PROP + " table property");
}
if (dbNumber == null) {
// default redis logical database
dbNumber = DEFAULT_REDIS_DB_NUMBER;
}
logger.info("doGetSplits: Preparing splits for {}", BlockUtils.rowToString(partitions, 0));
KeyType keyType;
Set<String> splitInputs = new HashSet<>();
RedisConnectionWrapper<String, String> connection = getOrCreateClient(redisEndpoint, sslEnabled, isCluster, dbNumber);
RedisCommandsWrapper<String, String> syncCommands = connection.sync();
String keyPrefix = getValue(partitions, 0, KEY_PREFIX_TABLE_PROP);
if (keyPrefix != null) {
// Add the prefixes to the list and set the key type.
splitInputs.addAll(Arrays.asList(keyPrefix.split(KEY_PREFIX_SEPERATOR)));
keyType = KeyType.PREFIX;
} else {
String prop = getValue(partitions, 0, ZSET_KEYS_TABLE_PROP);
if (prop == null) {
throw new RuntimeException("Table is missing " + ZSET_KEYS_TABLE_PROP + " table property, it must have this or " + KEY_PREFIX_TABLE_PROP);
}
String[] partitionPrefixes = prop.split(KEY_PREFIX_SEPERATOR);
ScanCursor keyCursor = null;
// Add all the values in the ZSETs ad keys to scan
for (String next : partitionPrefixes) {
do {
keyCursor = loadKeys(syncCommands, next, keyCursor, splitInputs);
} while (!keyCursor.isFinished());
}
keyType = KeyType.ZSET;
}
Set<Split> splits = new HashSet<>();
for (String next : splitInputs) {
splits.addAll(makeSplits(request, syncCommands, redisEndpoint, next, keyType, redisValueType, sslEnabled, isCluster, dbNumber));
}
return new GetSplitsResponse(request.getCatalogName(), splits, null);
}
use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class RedisRecordHandler method loadHashRow.
private void loadHashRow(RedisCommandsWrapper<String, String> syncCommands, String keyString, BlockSpiller spiller, List<Field> fieldList) {
spiller.writeRows((Block block, int row) -> {
boolean hashMatched = block.offerValue(KEY_COLUMN_NAME, row, keyString);
Map<String, String> rawValues = new HashMap<>();
// Glue only supports lowercase column names / also could do a better job only fetching the columns
// that are needed
syncCommands.hgetall(keyString).forEach((key, entry) -> rawValues.put(key.toLowerCase(), entry));
for (Field hfield : fieldList) {
Object hvalue = ValueConverter.convert(hfield, rawValues.get(hfield.getName()));
if (hashMatched && !block.offerValue(hfield.getName(), row, hvalue)) {
return 0;
}
}
return 1;
});
}
use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class RedisRecordHandler method loadZSetRows.
private void loadZSetRows(RedisCommandsWrapper<String, String> syncCommands, String keyString, BlockSpiller spiller, List<Field> fieldList) {
if (fieldList.size() != 1) {
throw new RuntimeException("Ambiguous field mapping, more than 1 field for ZSET value type.");
}
Field zfield = fieldList.get(0);
ScoredValueScanCursor<String> cursor = null;
do {
cursor = syncCommands.zscan(keyString, cursor == null ? INITIAL : cursor);
for (ScoredValue<String> nextElement : cursor.getValues()) {
spiller.writeRows((Block block, int rowNum) -> {
Object zvalue = ValueConverter.convert(zfield, nextElement.getValue());
boolean zsetMatched = block.offerValue(KEY_COLUMN_NAME, rowNum, keyString);
zsetMatched &= block.offerValue(zfield.getName(), rowNum, zvalue);
return zsetMatched ? 1 : 0;
});
}
} while (!cursor.isFinished());
}
Aggregations