Search in sources :

Example 16 with ValueSet

use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.

the class RedshiftRecordHandlerTest method getSingleValueSet.

private ValueSet getSingleValueSet(Object value) {
    Range range = Mockito.mock(Range.class, Mockito.RETURNS_DEEP_STUBS);
    Mockito.when(range.isSingleValue()).thenReturn(true);
    Mockito.when(range.getLow().getValue()).thenReturn(value);
    ValueSet valueSet = Mockito.mock(SortedRangeSet.class, Mockito.RETURNS_DEEP_STUBS);
    Mockito.when(valueSet.getRanges().getOrderedRanges()).thenReturn(Collections.singletonList(range));
    return valueSet;
}
Also used : Range(com.amazonaws.athena.connector.lambda.domain.predicate.Range) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)

Example 17 with ValueSet

use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.

the class SynapseRecordHandlerTest method getSingleValueSet.

private ValueSet getSingleValueSet(Object value) {
    Range range = Mockito.mock(Range.class, Mockito.RETURNS_DEEP_STUBS);
    Mockito.when(range.isSingleValue()).thenReturn(true);
    Mockito.when(range.getLow().getValue()).thenReturn(value);
    ValueSet valueSet = Mockito.mock(SortedRangeSet.class, Mockito.RETURNS_DEEP_STUBS);
    Mockito.when(valueSet.getRanges().getOrderedRanges()).thenReturn(Collections.singletonList(range));
    return valueSet;
}
Also used : Range(com.amazonaws.athena.connector.lambda.domain.predicate.Range) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)

Example 18 with ValueSet

use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.

the class TPCDSRecordHandlerTest method doReadRecordsSpill.

@Test
public void doReadRecordsSpill() throws Exception {
    logger.info("doReadRecordsSpill: enter");
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put("c_current_cdemo_sk", SortedRangeSet.of(Range.range(allocator, Types.MinorType.BIGINT.getType(), 100L, true, 100_000_000L, true)));
    ReadRecordsRequest request = new ReadRecordsRequest(identity, "catalog", "queryId-" + System.currentTimeMillis(), new TableName("tpcds1", table.getName()), schemaForRead, Split.newBuilder(S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build(), keyFactory.create()).add(SPLIT_NUMBER_FIELD, "0").add(SPLIT_TOTAL_NUMBER_FIELD, "10000").add(SPLIT_SCALE_FACTOR_FIELD, "1").build(), new Constraints(constraintsMap), // ~1.5MB so we should see some spill
    1_500_000L, 0);
    RecordResponse rawResponse = handler.doReadRecords(allocator, request);
    assertTrue(rawResponse instanceof RemoteReadRecordsResponse);
    try (RemoteReadRecordsResponse response = (RemoteReadRecordsResponse) rawResponse) {
        logger.info("doReadRecordsSpill: remoteBlocks[{}]", response.getRemoteBlocks().size());
        assertTrue(response.getNumberBlocks() > 1);
        int blockNum = 0;
        for (SpillLocation next : response.getRemoteBlocks()) {
            S3SpillLocation spillLocation = (S3SpillLocation) next;
            try (Block block = spillReader.read(spillLocation, response.getEncryptionKey(), response.getSchema())) {
                logger.info("doReadRecordsSpill: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount());
                // assertTrue(++blockNum < response.getRemoteBlocks().size() && block.getRowCount() > 10_000);
                logger.info("doReadRecordsSpill: {}", BlockUtils.rowToString(block, 0));
                assertNotNull(BlockUtils.rowToString(block, 0));
            }
        }
    }
    logger.info("doReadRecordsSpill: exit");
}
Also used : RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) HashMap(java.util.HashMap) Matchers.anyString(org.mockito.Matchers.anyString) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) Block(com.amazonaws.athena.connector.lambda.data.Block) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) Test(org.junit.Test)

Example 19 with ValueSet

use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.

the class TPCDSRecordHandlerTest method doReadRecordsNoSpill.

@Test
public void doReadRecordsNoSpill() throws Exception {
    logger.info("doReadRecordsNoSpill: enter");
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put("c_customer_id", EquatableValueSet.newBuilder(allocator, Types.MinorType.VARCHAR.getType(), true, false).add("AAAAAAAABAAAAAAA").add("AAAAAAAACAAAAAAA").add("AAAAAAAADAAAAAAA").build());
    ReadRecordsRequest request = new ReadRecordsRequest(identity, "catalog", "queryId-" + System.currentTimeMillis(), new TableName("tpcds1", table.getName()), schemaForRead, Split.newBuilder(S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build(), keyFactory.create()).add(SPLIT_NUMBER_FIELD, "0").add(SPLIT_TOTAL_NUMBER_FIELD, "1000").add(SPLIT_SCALE_FACTOR_FIELD, "1").build(), new Constraints(constraintsMap), 100_000_000_000L, // 100GB don't expect this to spill
    100_000_000_000L);
    RecordResponse rawResponse = handler.doReadRecords(allocator, request);
    assertTrue(rawResponse instanceof ReadRecordsResponse);
    ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
    logger.info("doReadRecordsNoSpill: rows[{}]", response.getRecordCount());
    assertTrue(response.getRecords().getRowCount() == 3);
    logger.info("doReadRecordsNoSpill: {}", BlockUtils.rowToString(response.getRecords(), 0));
    logger.info("doReadRecordsNoSpill: exit");
}
Also used : TableName(com.amazonaws.athena.connector.lambda.domain.TableName) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) HashMap(java.util.HashMap) ReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse) RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) Matchers.anyString(org.mockito.Matchers.anyString) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) Test(org.junit.Test)

Example 20 with ValueSet

use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.

the class TimestreamRecordHandlerTest method doReadRecordsNoSpill.

@Test
public void doReadRecordsNoSpill() throws Exception {
    int numRowsGenerated = 1_000;
    String expectedQuery = "SELECT measure_name, measure_value::double, az, time, hostname, region FROM \"my_schema\".\"my_table\" WHERE (\"az\" IN ('us-east-1a','us-east-1b'))";
    QueryResult mockResult = makeMockQueryResult(schemaForRead, numRowsGenerated);
    when(mockClient.query(any(QueryRequest.class))).thenAnswer((Answer<QueryResult>) invocationOnMock -> {
        QueryRequest request = (QueryRequest) invocationOnMock.getArguments()[0];
        assertEquals(expectedQuery, request.getQueryString().replace("\n", ""));
        return mockResult;
    });
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put("az", EquatableValueSet.newBuilder(allocator, Types.MinorType.VARCHAR.getType(), true, true).add("us-east-1a").add("us-east-1b").build());
    S3SpillLocation splitLoc = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
    Split.Builder splitBuilder = Split.newBuilder(splitLoc, keyFactory.create());
    ReadRecordsRequest request = new ReadRecordsRequest(IDENTITY, DEFAULT_CATALOG, "queryId-" + System.currentTimeMillis(), new TableName(DEFAULT_SCHEMA, TEST_TABLE), schemaForRead, splitBuilder.build(), new Constraints(constraintsMap), // 100GB don't expect this to spill
    100_000_000_000L, 100_000_000_000L);
    RecordResponse rawResponse = handler.doReadRecords(allocator, request);
    assertTrue(rawResponse instanceof ReadRecordsResponse);
    ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
    logger.info("doReadRecordsNoSpill: rows[{}]", response.getRecordCount());
    assertTrue(response.getRecords().getRowCount() > 0);
    // ensure we actually filtered something out
    assertTrue(response.getRecords().getRowCount() < numRowsGenerated);
    logger.info("doReadRecordsNoSpill: {}", BlockUtils.rowToString(response.getRecords(), 0));
}
Also used : QueryResult(com.amazonaws.services.timestreamquery.model.QueryResult) Schema(org.apache.arrow.vector.types.pojo.Schema) Types(org.apache.arrow.vector.types.Types) LoggerFactory(org.slf4j.LoggerFactory) BlockAllocator(com.amazonaws.athena.connector.lambda.data.BlockAllocator) SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation) Block(com.amazonaws.athena.connector.lambda.data.Block) ByteArrayInputStream(java.io.ByteArrayInputStream) After(org.junit.After) Map(java.util.Map) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) AmazonTimestreamQuery(com.amazonaws.services.timestreamquery.AmazonTimestreamQuery) BlockAllocatorImpl(com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl) Split(com.amazonaws.athena.connector.lambda.domain.Split) ReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse) UUID(java.util.UUID) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) Matchers.any(org.mockito.Matchers.any) List(java.util.List) ByteStreams(com.google.common.io.ByteStreams) BlockUtils(com.amazonaws.athena.connector.lambda.data.BlockUtils) S3ObjectInputStream(com.amazonaws.services.s3.model.S3ObjectInputStream) EncryptionKeyFactory(com.amazonaws.athena.connector.lambda.security.EncryptionKeyFactory) Mockito.mock(org.mockito.Mockito.mock) Mock(org.mockito.Mock) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) RunWith(org.junit.runner.RunWith) HashMap(java.util.HashMap) Matchers.anyString(org.mockito.Matchers.anyString) ArrayList(java.util.ArrayList) RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) Answer(org.mockito.stubbing.Answer) InvocationOnMock(org.mockito.invocation.InvocationOnMock) S3Object(com.amazonaws.services.s3.model.S3Object) SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) TestName(org.junit.rules.TestName) LocalKeyFactory(com.amazonaws.athena.connector.lambda.security.LocalKeyFactory) Matchers.anyObject(org.mockito.Matchers.anyObject) AmazonS3(com.amazonaws.services.s3.AmazonS3) FederatedIdentity(com.amazonaws.athena.connector.lambda.security.FederatedIdentity) PutObjectResult(com.amazonaws.services.s3.model.PutObjectResult) S3BlockSpillReader(com.amazonaws.athena.connector.lambda.data.S3BlockSpillReader) Before(org.junit.Before) Logger(org.slf4j.Logger) AmazonAthena(com.amazonaws.services.athena.AmazonAthena) Assert.assertNotNull(org.junit.Assert.assertNotNull) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Assert.assertTrue(org.junit.Assert.assertTrue) AWSSecretsManager(com.amazonaws.services.secretsmanager.AWSSecretsManager) Test(org.junit.Test) IOException(java.io.IOException) Mockito.when(org.mockito.Mockito.when) FieldBuilder(com.amazonaws.athena.connector.lambda.data.FieldBuilder) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) Rule(org.junit.Rule) MockitoJUnitRunner(org.mockito.runners.MockitoJUnitRunner) QueryRequest(com.amazonaws.services.timestreamquery.model.QueryRequest) TestUtils.makeMockQueryResult(com.amazonaws.athena.connectors.timestream.TestUtils.makeMockQueryResult) VIEW_METADATA_FIELD(com.amazonaws.athena.connector.lambda.handlers.GlueMetadataHandler.VIEW_METADATA_FIELD) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) InputStream(java.io.InputStream) QueryRequest(com.amazonaws.services.timestreamquery.model.QueryRequest) HashMap(java.util.HashMap) ReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse) RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) Matchers.anyString(org.mockito.Matchers.anyString) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) QueryResult(com.amazonaws.services.timestreamquery.model.QueryResult) TestUtils.makeMockQueryResult(com.amazonaws.athena.connectors.timestream.TestUtils.makeMockQueryResult) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) Split(com.amazonaws.athena.connector.lambda.domain.Split) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) Test(org.junit.Test)

Aggregations

ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)104 Test (org.junit.Test)66 Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)63 HashMap (java.util.HashMap)48 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)47 Schema (org.apache.arrow.vector.types.pojo.Schema)37 Split (com.amazonaws.athena.connector.lambda.domain.Split)31 Range (com.amazonaws.athena.connector.lambda.domain.predicate.Range)27 ReadRecordsRequest (com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest)27 EquatableValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet)26 ArrayList (java.util.ArrayList)25 Matchers.anyString (org.mockito.Matchers.anyString)25 RecordResponse (com.amazonaws.athena.connector.lambda.records.RecordResponse)24 Block (com.amazonaws.athena.connector.lambda.data.Block)23 S3SpillLocation (com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation)21 RemoteReadRecordsResponse (com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse)18 SchemaBuilder (com.amazonaws.athena.connector.lambda.data.SchemaBuilder)17 ReadRecordsResponse (com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse)17 InvocationOnMock (org.mockito.invocation.InvocationOnMock)17 BlockAllocatorImpl (com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl)13