Search in sources :

Example 21 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class RedisMetadataHandlerTest method doGetSplitsZset.

@Test
public void doGetSplitsZset() {
    // 3 prefixes for this table
    String prefixes = "prefix1-*,prefix2-*, prefix3-*";
    // 4 zsets per prefix
    when(mockSyncCommands.scan(any(ScanCursor.class), any(ScanArgs.class))).then((InvocationOnMock invocationOnMock) -> {
        ScanCursor cursor = (ScanCursor) invocationOnMock.getArguments()[0];
        if (cursor == null || cursor.getCursor().equals("0")) {
            List<String> result = new ArrayList<>();
            result.add(UUID.randomUUID().toString());
            result.add(UUID.randomUUID().toString());
            result.add(UUID.randomUUID().toString());
            MockKeyScanCursor<String> scanCursor = new MockKeyScanCursor<>();
            scanCursor.setCursor("1");
            scanCursor.setKeys(result);
            return scanCursor;
        } else {
            List<String> result = new ArrayList<>();
            result.add(UUID.randomUUID().toString());
            MockKeyScanCursor<String> scanCursor = new MockKeyScanCursor<>();
            scanCursor.setCursor("0");
            scanCursor.setKeys(result);
            scanCursor.setFinished(true);
            return scanCursor;
        }
    });
    // 100 keys per zset
    when(mockSyncCommands.zcount(anyString(), any(Range.class))).thenReturn(200L);
    List<String> partitionCols = new ArrayList<>();
    Schema schema = SchemaBuilder.newBuilder().addField("partitionId", Types.MinorType.INT.getType()).addStringField(REDIS_ENDPOINT_PROP).addStringField(VALUE_TYPE_TABLE_PROP).addStringField(KEY_PREFIX_TABLE_PROP).addStringField(ZSET_KEYS_TABLE_PROP).addStringField(REDIS_SSL_FLAG).addStringField(REDIS_CLUSTER_FLAG).addStringField(REDIS_DB_NUMBER).build();
    Block partitions = allocator.createBlock(schema);
    partitions.setValue(REDIS_ENDPOINT_PROP, 0, endpoint);
    partitions.setValue(VALUE_TYPE_TABLE_PROP, 0, "literal");
    partitions.setValue(KEY_PREFIX_TABLE_PROP, 0, null);
    partitions.setValue(ZSET_KEYS_TABLE_PROP, 0, prefixes);
    partitions.setValue(REDIS_SSL_FLAG, 0, null);
    partitions.setValue(REDIS_CLUSTER_FLAG, 0, null);
    partitions.setValue(REDIS_DB_NUMBER, 0, null);
    partitions.setRowCount(1);
    String continuationToken = null;
    GetSplitsRequest originalReq = new GetSplitsRequest(IDENTITY, QUERY_ID, DEFAULT_CATALOG, TABLE_NAME, partitions, partitionCols, new Constraints(new HashMap<>()), null);
    GetSplitsRequest req = new GetSplitsRequest(originalReq, continuationToken);
    logger.info("doGetSplitsPrefix: req[{}]", req);
    MetadataResponse rawResponse = handler.doGetSplits(allocator, req);
    assertEquals(MetadataRequestType.GET_SPLITS, rawResponse.getRequestType());
    GetSplitsResponse response = (GetSplitsResponse) rawResponse;
    continuationToken = response.getContinuationToken();
    logger.info("doGetSplitsPrefix: continuationToken[{}] - numSplits[{}]", new Object[] { continuationToken, response.getSplits().size() });
    assertEquals("Continuation criteria violated", 120, response.getSplits().size());
    assertTrue("Continuation criteria violated", response.getContinuationToken() == null);
    verify(mockSyncCommands, times(6)).scan(any(ScanCursor.class), any(ScanArgs.class));
}
Also used : GetSplitsRequest(com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest) HashMap(java.util.HashMap) ScanArgs(io.lettuce.core.ScanArgs) Schema(org.apache.arrow.vector.types.pojo.Schema) ArrayList(java.util.ArrayList) Matchers.anyString(org.mockito.Matchers.anyString) Range(io.lettuce.core.Range) ScanCursor(io.lettuce.core.ScanCursor) MockKeyScanCursor(com.amazonaws.athena.connectors.redis.util.MockKeyScanCursor) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) InvocationOnMock(org.mockito.invocation.InvocationOnMock) GetSplitsResponse(com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse) MetadataResponse(com.amazonaws.athena.connector.lambda.metadata.MetadataResponse) Block(com.amazonaws.athena.connector.lambda.data.Block) MockKeyScanCursor(com.amazonaws.athena.connectors.redis.util.MockKeyScanCursor) Test(org.junit.Test)

Example 22 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class RedisMetadataHandlerTest method doGetSplitsPrefix.

@Test
public void doGetSplitsPrefix() {
    Schema schema = SchemaBuilder.newBuilder().addField("partitionId", Types.MinorType.INT.getType()).addStringField(REDIS_ENDPOINT_PROP).addStringField(VALUE_TYPE_TABLE_PROP).addStringField(KEY_PREFIX_TABLE_PROP).addStringField(ZSET_KEYS_TABLE_PROP).addStringField(REDIS_SSL_FLAG).addStringField(REDIS_CLUSTER_FLAG).addStringField(REDIS_DB_NUMBER).build();
    Block partitions = allocator.createBlock(schema);
    partitions.setValue(REDIS_ENDPOINT_PROP, 0, endpoint);
    partitions.setValue(VALUE_TYPE_TABLE_PROP, 0, "literal");
    partitions.setValue(KEY_PREFIX_TABLE_PROP, 0, "prefix1-*,prefix2-*, prefix3-*");
    partitions.setValue(ZSET_KEYS_TABLE_PROP, 0, null);
    partitions.setValue(REDIS_SSL_FLAG, 0, null);
    partitions.setValue(REDIS_CLUSTER_FLAG, 0, null);
    partitions.setValue(REDIS_DB_NUMBER, 0, null);
    partitions.setRowCount(1);
    String continuationToken = null;
    GetSplitsRequest originalReq = new GetSplitsRequest(IDENTITY, QUERY_ID, DEFAULT_CATALOG, TABLE_NAME, partitions, new ArrayList<>(), new Constraints(new HashMap<>()), null);
    GetSplitsRequest req = new GetSplitsRequest(originalReq, continuationToken);
    logger.info("doGetSplitsPrefix: req[{}]", req);
    MetadataResponse rawResponse = handler.doGetSplits(allocator, req);
    assertEquals(MetadataRequestType.GET_SPLITS, rawResponse.getRequestType());
    GetSplitsResponse response = (GetSplitsResponse) rawResponse;
    continuationToken = response.getContinuationToken();
    logger.info("doGetSplitsPrefix: continuationToken[{}] - numSplits[{}]", new Object[] { continuationToken, response.getSplits().size() });
    assertTrue("Continuation criteria violated", response.getSplits().size() == 3);
    assertTrue("Continuation criteria violated", response.getContinuationToken() == null);
}
Also used : Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) GetSplitsRequest(com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest) HashMap(java.util.HashMap) GetSplitsResponse(com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse) Schema(org.apache.arrow.vector.types.pojo.Schema) MetadataResponse(com.amazonaws.athena.connector.lambda.metadata.MetadataResponse) Block(com.amazonaws.athena.connector.lambda.data.Block) Matchers.anyString(org.mockito.Matchers.anyString) Test(org.junit.Test)

Example 23 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class RedisMetadataHandler method doGetSplits.

/**
 * If the table is comprised of multiple key prefixes, then we parallelize those by making them each a split.
 *
 * @note This function essentially takes each key-prefix and makes it a split. For zset keys, it breaks each zset
 * into a max of N split that we have configured to generate as defined by REDIS_MAX_SPLITS.
 */
@Override
public GetSplitsResponse doGetSplits(BlockAllocator blockAllocator, GetSplitsRequest request) {
    if (request.getPartitions().getRowCount() != 1) {
        throw new RuntimeException("Unexpected number of partitions encountered.");
    }
    Block partitions = request.getPartitions();
    String redisEndpoint = getValue(partitions, 0, REDIS_ENDPOINT_PROP);
    String redisValueType = getValue(partitions, 0, VALUE_TYPE_TABLE_PROP);
    boolean sslEnabled = Boolean.parseBoolean(getValue(partitions, 0, REDIS_SSL_FLAG));
    boolean isCluster = Boolean.parseBoolean(getValue(partitions, 0, REDIS_CLUSTER_FLAG));
    String dbNumber = getValue(partitions, 0, REDIS_DB_NUMBER);
    if (redisEndpoint == null) {
        throw new RuntimeException("Table is missing " + REDIS_ENDPOINT_PROP + " table property");
    }
    if (redisValueType == null) {
        throw new RuntimeException("Table is missing " + VALUE_TYPE_TABLE_PROP + " table property");
    }
    if (dbNumber == null) {
        // default redis logical database
        dbNumber = DEFAULT_REDIS_DB_NUMBER;
    }
    logger.info("doGetSplits: Preparing splits for {}", BlockUtils.rowToString(partitions, 0));
    KeyType keyType;
    Set<String> splitInputs = new HashSet<>();
    RedisConnectionWrapper<String, String> connection = getOrCreateClient(redisEndpoint, sslEnabled, isCluster, dbNumber);
    RedisCommandsWrapper<String, String> syncCommands = connection.sync();
    String keyPrefix = getValue(partitions, 0, KEY_PREFIX_TABLE_PROP);
    if (keyPrefix != null) {
        // Add the prefixes to the list and set the key type.
        splitInputs.addAll(Arrays.asList(keyPrefix.split(KEY_PREFIX_SEPERATOR)));
        keyType = KeyType.PREFIX;
    } else {
        String prop = getValue(partitions, 0, ZSET_KEYS_TABLE_PROP);
        if (prop == null) {
            throw new RuntimeException("Table is missing " + ZSET_KEYS_TABLE_PROP + " table property, it must have this or " + KEY_PREFIX_TABLE_PROP);
        }
        String[] partitionPrefixes = prop.split(KEY_PREFIX_SEPERATOR);
        ScanCursor keyCursor = null;
        // Add all the values in the ZSETs ad keys to scan
        for (String next : partitionPrefixes) {
            do {
                keyCursor = loadKeys(syncCommands, next, keyCursor, splitInputs);
            } while (!keyCursor.isFinished());
        }
        keyType = KeyType.ZSET;
    }
    Set<Split> splits = new HashSet<>();
    for (String next : splitInputs) {
        splits.addAll(makeSplits(request, syncCommands, redisEndpoint, next, keyType, redisValueType, sslEnabled, isCluster, dbNumber));
    }
    return new GetSplitsResponse(request.getCatalogName(), splits, null);
}
Also used : ScanCursor(io.lettuce.core.ScanCursor) KeyScanCursor(io.lettuce.core.KeyScanCursor) GetSplitsResponse(com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse) Block(com.amazonaws.athena.connector.lambda.data.Block) Split(com.amazonaws.athena.connector.lambda.domain.Split) HashSet(java.util.HashSet)

Example 24 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class RedisRecordHandler method loadHashRow.

private void loadHashRow(RedisCommandsWrapper<String, String> syncCommands, String keyString, BlockSpiller spiller, List<Field> fieldList) {
    spiller.writeRows((Block block, int row) -> {
        boolean hashMatched = block.offerValue(KEY_COLUMN_NAME, row, keyString);
        Map<String, String> rawValues = new HashMap<>();
        // Glue only supports lowercase column names / also could do a better job only fetching the columns
        // that are needed
        syncCommands.hgetall(keyString).forEach((key, entry) -> rawValues.put(key.toLowerCase(), entry));
        for (Field hfield : fieldList) {
            Object hvalue = ValueConverter.convert(hfield, rawValues.get(hfield.getName()));
            if (hashMatched && !block.offerValue(hfield.getName(), row, hvalue)) {
                return 0;
            }
        }
        return 1;
    });
}
Also used : Field(org.apache.arrow.vector.types.pojo.Field) HashMap(java.util.HashMap) Block(com.amazonaws.athena.connector.lambda.data.Block)

Example 25 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class RedisRecordHandler method loadZSetRows.

private void loadZSetRows(RedisCommandsWrapper<String, String> syncCommands, String keyString, BlockSpiller spiller, List<Field> fieldList) {
    if (fieldList.size() != 1) {
        throw new RuntimeException("Ambiguous field mapping, more than 1 field for ZSET value type.");
    }
    Field zfield = fieldList.get(0);
    ScoredValueScanCursor<String> cursor = null;
    do {
        cursor = syncCommands.zscan(keyString, cursor == null ? INITIAL : cursor);
        for (ScoredValue<String> nextElement : cursor.getValues()) {
            spiller.writeRows((Block block, int rowNum) -> {
                Object zvalue = ValueConverter.convert(zfield, nextElement.getValue());
                boolean zsetMatched = block.offerValue(KEY_COLUMN_NAME, rowNum, keyString);
                zsetMatched &= block.offerValue(zfield.getName(), rowNum, zvalue);
                return zsetMatched ? 1 : 0;
            });
        }
    } while (!cursor.isFinished());
}
Also used : Field(org.apache.arrow.vector.types.pojo.Field) Block(com.amazonaws.athena.connector.lambda.data.Block)

Aggregations

Block (com.amazonaws.athena.connector.lambda.data.Block)113 Test (org.junit.Test)39 HashMap (java.util.HashMap)35 Schema (org.apache.arrow.vector.types.pojo.Schema)35 Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)32 Split (com.amazonaws.athena.connector.lambda.domain.Split)31 GetSplitsResponse (com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse)28 FieldReader (org.apache.arrow.vector.complex.reader.FieldReader)28 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)27 SpillLocation (com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation)23 HashSet (java.util.HashSet)23 ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)20 Field (org.apache.arrow.vector.types.pojo.Field)17 GetSplitsRequest (com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest)13 PreparedStatement (java.sql.PreparedStatement)13 ResultSet (java.sql.ResultSet)13 ArrayList (java.util.ArrayList)13 MetadataResponse (com.amazonaws.athena.connector.lambda.metadata.MetadataResponse)12 Connection (java.sql.Connection)12 ReadRecordsRequest (com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest)11