Search in sources :

Example 21 with ReadRecordsResponse

use of com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse in project aws-athena-query-federation by awslabs.

the class RedisRecordHandlerTest method doReadRecordsHash.

@Test
public void doReadRecordsHash() throws Exception {
    // 4 keys per prefix
    when(mockSyncCommands.scan(any(ScanCursor.class), any(ScanArgs.class))).then((InvocationOnMock invocationOnMock) -> {
        ScanCursor cursor = (ScanCursor) invocationOnMock.getArguments()[0];
        if (cursor == null || cursor.getCursor().equals("0")) {
            List<String> result = new ArrayList<>();
            result.add(UUID.randomUUID().toString());
            result.add(UUID.randomUUID().toString());
            result.add(UUID.randomUUID().toString());
            result.add(UUID.randomUUID().toString());
            result.add(UUID.randomUUID().toString());
            MockKeyScanCursor<String> scanCursor = new MockKeyScanCursor<>();
            scanCursor.setCursor("1");
            scanCursor.setKeys(result);
            return scanCursor;
        } else {
            List<String> result = new ArrayList<>();
            result.add(UUID.randomUUID().toString());
            result.add(UUID.randomUUID().toString());
            MockKeyScanCursor<String> scanCursor = new MockKeyScanCursor<>();
            scanCursor.setCursor("0");
            scanCursor.setKeys(result);
            scanCursor.setFinished(true);
            return scanCursor;
        }
    });
    // 4 columns per key
    AtomicLong intColVal = new AtomicLong(0);
    when(mockSyncCommands.hgetall(anyString())).then((InvocationOnMock invocationOnMock) -> {
        Map<String, String> result = new HashMap<>();
        result.put("intcol", String.valueOf(intColVal.getAndIncrement()));
        result.put("stringcol", UUID.randomUUID().toString());
        result.put("extracol", UUID.randomUUID().toString());
        return result;
    });
    AtomicLong value = new AtomicLong(0);
    when(mockSyncCommands.get(anyString())).thenAnswer((InvocationOnMock invocationOnMock) -> String.valueOf(value.getAndIncrement()));
    S3SpillLocation splitLoc = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
    Split split = Split.newBuilder(splitLoc, keyFactory.create()).add(REDIS_ENDPOINT_PROP, endpoint).add(KEY_TYPE, KeyType.PREFIX.getId()).add(KEY_PREFIX_TABLE_PROP, "key-*").add(VALUE_TYPE_TABLE_PROP, ValueType.HASH.getId()).build();
    Schema schemaForRead = SchemaBuilder.newBuilder().addField("_key_", Types.MinorType.VARCHAR.getType()).addField("intcol", Types.MinorType.INT.getType()).addField("stringcol", Types.MinorType.VARCHAR.getType()).build();
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put("intcol", SortedRangeSet.copyOf(Types.MinorType.INT.getType(), ImmutableList.of(Range.greaterThan(allocator, Types.MinorType.INT.getType(), 1)), false));
    ReadRecordsRequest request = new ReadRecordsRequest(IDENTITY, DEFAULT_CATALOG, "queryId-" + System.currentTimeMillis(), TABLE_NAME, schemaForRead, split, new Constraints(constraintsMap), // 100GB don't expect this to spill
    100_000_000_000L, 100_000_000_000L);
    RecordResponse rawResponse = handler.doReadRecords(allocator, request);
    assertTrue(rawResponse instanceof ReadRecordsResponse);
    ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
    logger.info("doReadRecordsHash: rows[{}]", response.getRecordCount());
    logger.info("doReadRecordsHash: {}", BlockUtils.rowToString(response.getRecords(), 0));
    assertTrue(response.getRecords().getRowCount() == 5);
    assertTrue(response.getRecords().getFields().size() == schemaForRead.getFields().size());
    FieldReader keyReader = response.getRecords().getFieldReader(KEY_COLUMN_NAME);
    keyReader.setPosition(0);
    assertNotNull(keyReader.readText());
    FieldReader intCol = response.getRecords().getFieldReader("intcol");
    intCol.setPosition(0);
    assertNotNull(intCol.readInteger());
    FieldReader stringCol = response.getRecords().getFieldReader("stringcol");
    stringCol.setPosition(0);
    assertNotNull(stringCol.readText());
}
Also used : HashMap(java.util.HashMap) ReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse) ScanArgs(io.lettuce.core.ScanArgs) Schema(org.apache.arrow.vector.types.pojo.Schema) ArrayList(java.util.ArrayList) Matchers.anyString(org.mockito.Matchers.anyString) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) ScanCursor(io.lettuce.core.ScanCursor) MockScoredValueScanCursor(com.amazonaws.athena.connectors.redis.util.MockScoredValueScanCursor) MockKeyScanCursor(com.amazonaws.athena.connectors.redis.util.MockKeyScanCursor) AtomicLong(java.util.concurrent.atomic.AtomicLong) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) InvocationOnMock(org.mockito.invocation.InvocationOnMock) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) Split(com.amazonaws.athena.connector.lambda.domain.Split) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) FieldReader(org.apache.arrow.vector.complex.reader.FieldReader) MockKeyScanCursor(com.amazonaws.athena.connectors.redis.util.MockKeyScanCursor) Test(org.junit.Test)

Example 22 with ReadRecordsResponse

use of com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse in project aws-athena-query-federation by awslabs.

the class TimestreamRecordHandlerTest method readRecordsView.

@Test
public void readRecordsView() throws Exception {
    logger.info("readRecordsView - enter");
    Schema schemaForReadView = SchemaBuilder.newBuilder().addField("measure_name", Types.MinorType.VARCHAR.getType()).addField("az", Types.MinorType.VARCHAR.getType()).addField("value", Types.MinorType.FLOAT8.getType()).addField("num_samples", Types.MinorType.BIGINT.getType()).addMetadata(VIEW_METADATA_FIELD, "select measure_name, az,sum(\"measure_value::double\") as value, count(*) as num_samples from \"" + DEFAULT_SCHEMA + "\".\"" + TEST_TABLE + "\" group by measure_name, az").build();
    String expectedQuery = "WITH t1 AS ( select measure_name, az,sum(\"measure_value::double\") as value, count(*) as num_samples from \"my_schema\".\"my_table\" group by measure_name, az )  SELECT measure_name, az, value, num_samples FROM t1 WHERE (\"az\" IN ('us-east-1a','us-east-1b'))";
    QueryResult mockResult = makeMockQueryResult(schemaForReadView, 1_000);
    when(mockClient.query(any(QueryRequest.class))).thenAnswer((Answer<QueryResult>) invocationOnMock -> {
        QueryRequest request = (QueryRequest) invocationOnMock.getArguments()[0];
        assertEquals(expectedQuery, request.getQueryString().replace("\n", ""));
        return mockResult;
    });
    S3SpillLocation splitLoc = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
    Split split = Split.newBuilder(splitLoc, null).build();
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put("az", EquatableValueSet.newBuilder(allocator, Types.MinorType.VARCHAR.getType(), true, true).add("us-east-1a").add("us-east-1b").build());
    ReadRecordsRequest request = new ReadRecordsRequest(IDENTITY, "default", "queryId-" + System.currentTimeMillis(), new TableName(DEFAULT_SCHEMA, TEST_VIEW), schemaForReadView, split, new Constraints(constraintsMap), // 100GB don't expect this to spill
    100_000_000_000L, 100_000_000_000L);
    RecordResponse rawResponse = handler.doReadRecords(allocator, request);
    ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
    logger.info("readRecordsView: rows[{}]", response.getRecordCount());
    for (int i = 0; i < response.getRecordCount() && i < 10; i++) {
        logger.info("readRecordsView: {}", BlockUtils.rowToString(response.getRecords(), i));
    }
    logger.info("readRecordsView - exit");
}
Also used : QueryResult(com.amazonaws.services.timestreamquery.model.QueryResult) Schema(org.apache.arrow.vector.types.pojo.Schema) Types(org.apache.arrow.vector.types.Types) LoggerFactory(org.slf4j.LoggerFactory) BlockAllocator(com.amazonaws.athena.connector.lambda.data.BlockAllocator) SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation) Block(com.amazonaws.athena.connector.lambda.data.Block) ByteArrayInputStream(java.io.ByteArrayInputStream) After(org.junit.After) Map(java.util.Map) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) AmazonTimestreamQuery(com.amazonaws.services.timestreamquery.AmazonTimestreamQuery) BlockAllocatorImpl(com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl) Split(com.amazonaws.athena.connector.lambda.domain.Split) ReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse) UUID(java.util.UUID) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) Matchers.any(org.mockito.Matchers.any) List(java.util.List) ByteStreams(com.google.common.io.ByteStreams) BlockUtils(com.amazonaws.athena.connector.lambda.data.BlockUtils) S3ObjectInputStream(com.amazonaws.services.s3.model.S3ObjectInputStream) EncryptionKeyFactory(com.amazonaws.athena.connector.lambda.security.EncryptionKeyFactory) Mockito.mock(org.mockito.Mockito.mock) Mock(org.mockito.Mock) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) RunWith(org.junit.runner.RunWith) HashMap(java.util.HashMap) Matchers.anyString(org.mockito.Matchers.anyString) ArrayList(java.util.ArrayList) RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) Answer(org.mockito.stubbing.Answer) InvocationOnMock(org.mockito.invocation.InvocationOnMock) S3Object(com.amazonaws.services.s3.model.S3Object) SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) TestName(org.junit.rules.TestName) LocalKeyFactory(com.amazonaws.athena.connector.lambda.security.LocalKeyFactory) Matchers.anyObject(org.mockito.Matchers.anyObject) AmazonS3(com.amazonaws.services.s3.AmazonS3) FederatedIdentity(com.amazonaws.athena.connector.lambda.security.FederatedIdentity) PutObjectResult(com.amazonaws.services.s3.model.PutObjectResult) S3BlockSpillReader(com.amazonaws.athena.connector.lambda.data.S3BlockSpillReader) Before(org.junit.Before) Logger(org.slf4j.Logger) AmazonAthena(com.amazonaws.services.athena.AmazonAthena) Assert.assertNotNull(org.junit.Assert.assertNotNull) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Assert.assertTrue(org.junit.Assert.assertTrue) AWSSecretsManager(com.amazonaws.services.secretsmanager.AWSSecretsManager) Test(org.junit.Test) IOException(java.io.IOException) Mockito.when(org.mockito.Mockito.when) FieldBuilder(com.amazonaws.athena.connector.lambda.data.FieldBuilder) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) Rule(org.junit.Rule) MockitoJUnitRunner(org.mockito.runners.MockitoJUnitRunner) QueryRequest(com.amazonaws.services.timestreamquery.model.QueryRequest) TestUtils.makeMockQueryResult(com.amazonaws.athena.connectors.timestream.TestUtils.makeMockQueryResult) VIEW_METADATA_FIELD(com.amazonaws.athena.connector.lambda.handlers.GlueMetadataHandler.VIEW_METADATA_FIELD) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) InputStream(java.io.InputStream) QueryRequest(com.amazonaws.services.timestreamquery.model.QueryRequest) HashMap(java.util.HashMap) ReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse) RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) Schema(org.apache.arrow.vector.types.pojo.Schema) Matchers.anyString(org.mockito.Matchers.anyString) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) QueryResult(com.amazonaws.services.timestreamquery.model.QueryResult) TestUtils.makeMockQueryResult(com.amazonaws.athena.connectors.timestream.TestUtils.makeMockQueryResult) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) Split(com.amazonaws.athena.connector.lambda.domain.Split) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) Test(org.junit.Test)

Example 23 with ReadRecordsResponse

use of com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse in project aws-athena-query-federation by awslabs.

the class TPCDSRecordHandlerTest method doReadRecordForTPCDSTIMETypeColumn.

@Test
public void doReadRecordForTPCDSTIMETypeColumn() throws Exception {
    for (Table next : Table.getBaseTables()) {
        if (next.getName().equals("dbgen_version")) {
            table = next;
        }
    }
    SchemaBuilder schemaBuilder = SchemaBuilder.newBuilder();
    for (Column nextCol : table.getColumns()) {
        schemaBuilder.addField(TPCDSUtils.convertColumn(nextCol));
    }
    ReadRecordsRequest request = new ReadRecordsRequest(identity, "catalog", "queryId-" + System.currentTimeMillis(), new TableName("tpcds1", table.getName()), schemaBuilder.build(), Split.newBuilder(S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build(), keyFactory.create()).add(SPLIT_NUMBER_FIELD, "0").add(SPLIT_TOTAL_NUMBER_FIELD, "1000").add(SPLIT_SCALE_FACTOR_FIELD, "1").build(), new Constraints(ImmutableMap.of()), 100_000_000_000L, 100_000_000_000L);
    RecordResponse rawResponse = handler.doReadRecords(allocator, request);
    assertTrue(rawResponse instanceof ReadRecordsResponse);
    ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
    logger.info("doReadRecordForTPCDSTIMETypeColumn: {}", BlockUtils.rowToString(response.getRecords(), 0));
    // TPCDS for `dbgen_version` always generates 1 record.
    assertEquals(1, response.getRecords().getRowCount());
    logger.info("doReadRecordForTPCDSTIMETypeColumn: exit");
}
Also used : TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Table(com.teradata.tpcds.Table) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) Column(com.teradata.tpcds.column.Column) ReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse) RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) Test(org.junit.Test)

Example 24 with ReadRecordsResponse

use of com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse in project aws-athena-query-federation by awslabs.

the class ExampleRecordHandlerTest method doReadRecordsNoSpill.

@Test
public void doReadRecordsNoSpill() throws Exception {
    if (!enableTests) {
        // We do this because until you complete the tutorial these tests will fail. When you attempt to publis
        // using ../toos/publish.sh ...  it will set the publishing flag and force these tests. This is how we
        // avoid breaking the build but still have a useful tutorial. We are also duplicateing this block
        // on purpose since this is a somewhat odd pattern.
        logger.info("doReadRecordsNoSpill: Tests are disabled, to enable them set the 'publishing' environment variable " + "using maven clean install -Dpublishing=true");
        return;
    }
    for (int i = 0; i < 2; i++) {
        Map<String, ValueSet> constraintsMap = new HashMap<>();
        ReadRecordsRequest request = new ReadRecordsRequest(fakeIdentity(), "catalog", "queryId-" + System.currentTimeMillis(), new TableName("schema", "table"), schemaForRead, Split.newBuilder(makeSpillLocation(), null).add("year", "2017").add("month", "11").add("day", "1").build(), new Constraints(constraintsMap), // 100GB don't expect this to spill
        100_000_000_000L, 100_000_000_000L);
        RecordResponse rawResponse = handler.doReadRecords(allocator, request);
        assertTrue(rawResponse instanceof ReadRecordsResponse);
        ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
        logger.info("doReadRecordsNoSpill: rows[{}]", response.getRecordCount());
        assertTrue(response.getRecords().getRowCount() > 0);
        logger.info("doReadRecordsNoSpill: {}", BlockUtils.rowToString(response.getRecords(), 0));
    }
}
Also used : TableName(com.amazonaws.athena.connector.lambda.domain.TableName) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) HashMap(java.util.HashMap) ReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse) Matchers.anyString(org.mockito.Matchers.anyString) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) Test(org.junit.Test)

Example 25 with ReadRecordsResponse

use of com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse in project aws-athena-query-federation by awslabs.

the class LambdaRecordProvider method readRecords.

/**
 * This method builds and executes a ReadRecordsRequest against the specified Lambda function.
 *
 * @param catalog the catalog name to be passed to Lambda
 * @param tableName the schema-qualified table name indicating the table for which splits should be retrieved
 * @param constraints the constraints to be applied to the request
 * @param schema the schema of the table in question
 * @param split the split to be read in this request
 * @param recordFunction the name of the Lambda function to call
 * @param identity the identity of the caller
 * @return the response
 */
public static ReadRecordsResponse readRecords(String catalog, TableName tableName, Constraints constraints, Schema schema, Split split, String recordFunction, FederatedIdentity identity) {
    String queryId = generateQueryId();
    log.info("Submitting ReadRecordsRequest with ID " + queryId);
    try (ReadRecordsRequest request = new ReadRecordsRequest(identity, queryId, catalog, tableName, schema, split, constraints, MAX_BLOCK_SIZE, MAX_INLINE_BLOCK_SIZE)) {
        log.info("Submitting request: {}", request);
        ReadRecordsResponse response = (ReadRecordsResponse) getService(recordFunction, identity, catalog).call(request);
        log.info("Received response: {}", response);
        return response;
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}
Also used : ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) ReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse)

Aggregations

ReadRecordsResponse (com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse)33 ReadRecordsRequest (com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest)27 Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)26 RecordResponse (com.amazonaws.athena.connector.lambda.records.RecordResponse)26 Test (org.junit.Test)25 Split (com.amazonaws.athena.connector.lambda.domain.Split)17 ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)16 HashMap (java.util.HashMap)16 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)15 RemoteReadRecordsResponse (com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse)15 ArrayList (java.util.ArrayList)15 Matchers.anyString (org.mockito.Matchers.anyString)15 S3SpillLocation (com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation)12 Schema (org.apache.arrow.vector.types.pojo.Schema)11 InvocationOnMock (org.mockito.invocation.InvocationOnMock)11 Block (com.amazonaws.athena.connector.lambda.data.Block)7 BlockAllocatorImpl (com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl)6 GetTableResponse (com.amazonaws.athena.connector.lambda.metadata.GetTableResponse)6 InputStream (java.io.InputStream)6 GetTableRequest (com.amazonaws.athena.connector.lambda.metadata.GetTableRequest)5