Search in sources :

Example 11 with ReadRecordsResponse

use of com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse in project aws-athena-query-federation by awslabs.

the class DocDBRecordHandlerTest method nestedStructTest.

@Test
public void nestedStructTest() throws Exception {
    List<Document> documents = new ArrayList<>();
    Document result = new Document();
    documents.add(result);
    Document listStruct1 = new Document();
    listStruct1.put("SomeSubStruct", "someSubStruct1");
    List<Document> subList = new ArrayList<>();
    Document listSubStruct1 = new Document();
    listSubStruct1.put("SomeSubSubStruct", "someSubSubStruct");
    subList.add(listSubStruct1);
    listStruct1.put("SomeSubList", subList);
    Document listStruct2 = new Document();
    listStruct2.put("SomeSubStruct1", "someSubStruct2");
    List<Document> list = new ArrayList<>();
    list.add(listStruct1);
    list.add(listStruct1);
    Document structWithList = new Document();
    structWithList.put("SomeList", list);
    Document structWithNullList = new Document();
    structWithNullList.put("SomeNullList", null);
    Document simpleSubStruct = new Document();
    simpleSubStruct.put("SomeSimpleSubStruct", "someSimpleSubStruct");
    structWithList.put("SimpleSubStruct", simpleSubStruct);
    structWithList.put("SimpleSubStructNullList", structWithNullList);
    result.put("ComplexStruct", structWithList);
    Document simpleStruct = new Document();
    simpleStruct.put("SomeSimpleStruct", "someSimpleStruct");
    result.put("SimpleStruct", simpleStruct);
    when(mockCollection.find()).thenReturn(mockIterable);
    when(mockIterable.limit(anyInt())).thenReturn(mockIterable);
    when(mockIterable.maxScan(anyInt())).thenReturn(mockIterable);
    when(mockIterable.batchSize(anyInt())).thenReturn(mockIterable);
    when(mockIterable.iterator()).thenReturn(new StubbingCursor(documents.iterator()));
    GetTableRequest req = new GetTableRequest(IDENTITY, QUERY_ID, DEFAULT_CATALOG, TABLE_NAME);
    GetTableResponse res = mdHandler.doGetTable(allocator, req);
    logger.info("doGetTable - {}", res);
    when(mockCollection.find(any(Document.class))).thenAnswer((InvocationOnMock invocationOnMock) -> {
        logger.info("doReadRecordsNoSpill: query[{}]", invocationOnMock.getArguments()[0]);
        return mockIterable;
    });
    when(mockIterable.projection(any(Document.class))).thenAnswer((InvocationOnMock invocationOnMock) -> {
        logger.info("doReadRecordsNoSpill: projection[{}]", invocationOnMock.getArguments()[0]);
        return mockIterable;
    });
    when(mockIterable.batchSize(anyInt())).thenReturn(mockIterable);
    when(mockIterable.iterator()).thenReturn(new StubbingCursor(documents.iterator()));
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    S3SpillLocation splitLoc = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
    ReadRecordsRequest request = new ReadRecordsRequest(IDENTITY, DEFAULT_CATALOG, "queryId-" + System.currentTimeMillis(), TABLE_NAME, res.getSchema(), Split.newBuilder(splitLoc, keyFactory.create()).add(DOCDB_CONN_STR, CONNECTION_STRING).build(), new Constraints(constraintsMap), // 100GB don't expect this to spill
    100_000_000_000L, 100_000_000_000L);
    RecordResponse rawResponse = handler.doReadRecords(allocator, request);
    assertTrue(rawResponse instanceof ReadRecordsResponse);
    ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
    logger.info("doReadRecordsNoSpill: rows[{}]", response.getRecordCount());
    logger.info("doReadRecordsNoSpill: {}", BlockUtils.rowToString(response.getRecords(), 0));
    assertTrue(response.getRecordCount() == 1);
    String expectedString = "[ComplexStruct : {[SomeList : {{[SomeSubStruct : someSubStruct1]," + "[SomeSubList : {{[SomeSubSubStruct : someSubSubStruct]}}]}," + "{[SomeSubStruct : someSubStruct1],[SomeSubList : {{[SomeSubSubStruct : someSubSubStruct]}}]}}]," + "[SimpleSubStruct : {[SomeSimpleSubStruct : someSimpleSubStruct]}]," + "[SimpleSubStructNullList : {[SomeNullList : null]}]}], [SimpleStruct : {[SomeSimpleStruct : someSimpleStruct]}]";
    assertEquals(expectedString, BlockUtils.rowToString(response.getRecords(), 0));
}
Also used : HashMap(java.util.HashMap) ReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse) RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) ArrayList(java.util.ArrayList) Matchers.anyString(org.mockito.Matchers.anyString) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) Document(org.bson.Document) GetTableRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableRequest) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) GetTableResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableResponse) InvocationOnMock(org.mockito.invocation.InvocationOnMock) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) Test(org.junit.Test)

Example 12 with ReadRecordsResponse

use of com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse in project aws-athena-query-federation by awslabs.

the class DocDBRecordHandlerTest method doReadRecordsNoSpill.

@Test
public void doReadRecordsNoSpill() throws Exception {
    List<Document> documents = new ArrayList<>();
    int docNum = 11;
    Document doc1 = DocumentGenerator.makeRandomRow(schemaForRead.getFields(), docNum++);
    documents.add(doc1);
    doc1.put("col3", 22.0D);
    Document doc2 = DocumentGenerator.makeRandomRow(schemaForRead.getFields(), docNum++);
    documents.add(doc2);
    doc2.put("col3", 22.0D);
    Document doc3 = DocumentGenerator.makeRandomRow(schemaForRead.getFields(), docNum++);
    documents.add(doc3);
    doc3.put("col3", 21.0D);
    doc3.put("unsupported", new UnsupportedType());
    when(mockCollection.find(any(Document.class))).thenAnswer((InvocationOnMock invocationOnMock) -> {
        logger.info("doReadRecordsNoSpill: query[{}]", invocationOnMock.getArguments()[0]);
        return mockIterable;
    });
    when(mockIterable.projection(any(Document.class))).thenAnswer((InvocationOnMock invocationOnMock) -> {
        logger.info("doReadRecordsNoSpill: projection[{}]", invocationOnMock.getArguments()[0]);
        return mockIterable;
    });
    when(mockIterable.batchSize(anyInt())).thenReturn(mockIterable);
    when(mockIterable.iterator()).thenReturn(new StubbingCursor(documents.iterator()));
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put("col3", SortedRangeSet.copyOf(Types.MinorType.FLOAT8.getType(), ImmutableList.of(Range.equal(allocator, Types.MinorType.FLOAT8.getType(), 22.0D)), false));
    S3SpillLocation splitLoc = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
    ReadRecordsRequest request = new ReadRecordsRequest(IDENTITY, DEFAULT_CATALOG, "queryId-" + System.currentTimeMillis(), TABLE_NAME, schemaForRead, Split.newBuilder(splitLoc, keyFactory.create()).add(DOCDB_CONN_STR, CONNECTION_STRING).build(), new Constraints(constraintsMap), // 100GB don't expect this to spill
    100_000_000_000L, 100_000_000_000L);
    RecordResponse rawResponse = handler.doReadRecords(allocator, request);
    assertTrue(rawResponse instanceof ReadRecordsResponse);
    ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
    logger.info("doReadRecordsNoSpill: rows[{}]", response.getRecordCount());
    assertTrue(response.getRecords().getRowCount() == 2);
    logger.info("doReadRecordsNoSpill: {}", BlockUtils.rowToString(response.getRecords(), 0));
}
Also used : HashMap(java.util.HashMap) ReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse) RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) ArrayList(java.util.ArrayList) Matchers.anyString(org.mockito.Matchers.anyString) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) Document(org.bson.Document) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) InvocationOnMock(org.mockito.invocation.InvocationOnMock) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) Test(org.junit.Test)

Example 13 with ReadRecordsResponse

use of com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse in project aws-athena-query-federation by awslabs.

the class MetricsRecordHandlerTest method readMetricSamplesWithConstraint.

@Test
public void readMetricSamplesWithConstraint() throws Exception {
    logger.info("readMetricSamplesWithConstraint: enter");
    String namespace = "namespace";
    String metricName = "metricName";
    String statistic = "p90";
    String period = "60";
    String dimName = "dimName";
    String dimValue = "dimValue";
    List<Dimension> dimensions = Collections.singletonList(new Dimension().withName(dimName).withValue(dimValue));
    int numMetrics = 10;
    int numSamples = 10;
    AtomicLong numCalls = new AtomicLong(0);
    when(mockMetrics.getMetricData(any(GetMetricDataRequest.class))).thenAnswer((InvocationOnMock invocation) -> {
        numCalls.incrementAndGet();
        return mockMetricData(invocation, numMetrics, numSamples);
    });
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put(NAMESPACE_FIELD, makeStringEquals(allocator, namespace));
    constraintsMap.put(STATISTIC_FIELD, makeStringEquals(allocator, statistic));
    constraintsMap.put(DIMENSION_NAME_FIELD, makeStringEquals(allocator, dimName));
    constraintsMap.put(DIMENSION_VALUE_FIELD, makeStringEquals(allocator, dimValue));
    S3SpillLocation spillLocation = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
    List<MetricStat> metricStats = new ArrayList<>();
    metricStats.add(new MetricStat().withMetric(new Metric().withNamespace(namespace).withMetricName(metricName).withDimensions(dimensions)).withPeriod(60).withStat(statistic));
    Split split = Split.newBuilder(spillLocation, keyFactory.create()).add(MetricStatSerDe.SERIALIZED_METRIC_STATS_FIELD_NAME, MetricStatSerDe.serialize(metricStats)).add(METRIC_NAME_FIELD, metricName).add(NAMESPACE_FIELD, namespace).add(STATISTIC_FIELD, statistic).add(PERIOD_FIELD, period).build();
    ReadRecordsRequest request = new ReadRecordsRequest(identity, "catalog", "queryId-" + System.currentTimeMillis(), METRIC_SAMPLES_TABLE_NAME, METRIC_DATA_TABLE.getSchema(), split, new Constraints(constraintsMap), 100_000_000_000L, // 100GB don't expect this to spill
    100_000_000_000L);
    RecordResponse rawResponse = handler.doReadRecords(allocator, request);
    assertTrue(rawResponse instanceof ReadRecordsResponse);
    ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
    logger.info("readMetricSamplesWithConstraint: rows[{}]", response.getRecordCount());
    assertEquals(numCalls.get() * numMetrics * numSamples, response.getRecords().getRowCount());
    logger.info("readMetricSamplesWithConstraint: {}", BlockUtils.rowToString(response.getRecords(), 0));
    logger.info("readMetricSamplesWithConstraint: exit");
}
Also used : HashMap(java.util.HashMap) ReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse) MetricStat(com.amazonaws.services.cloudwatch.model.MetricStat) ArrayList(java.util.ArrayList) Matchers.anyString(org.mockito.Matchers.anyString) Dimension(com.amazonaws.services.cloudwatch.model.Dimension) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) AtomicLong(java.util.concurrent.atomic.AtomicLong) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) GetMetricDataRequest(com.amazonaws.services.cloudwatch.model.GetMetricDataRequest) InvocationOnMock(org.mockito.invocation.InvocationOnMock) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) Metric(com.amazonaws.services.cloudwatch.model.Metric) Split(com.amazonaws.athena.connector.lambda.domain.Split) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) Test(org.junit.Test)

Example 14 with ReadRecordsResponse

use of com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse in project aws-athena-query-federation by awslabs.

the class DynamoDBRecordHandlerTest method testReadScanSplit.

@Test
public void testReadScanSplit() throws Exception {
    Split split = Split.newBuilder(SPILL_LOCATION, keyFactory.create()).add(TABLE_METADATA, TEST_TABLE).add(SEGMENT_ID_PROPERTY, "0").add(SEGMENT_COUNT_METADATA, "1").build();
    ReadRecordsRequest request = new ReadRecordsRequest(TEST_IDENTITY, TEST_CATALOG_NAME, TEST_QUERY_ID, TEST_TABLE_NAME, schema, split, new Constraints(ImmutableMap.of()), // too big to spill
    100_000_000_000L, 100_000_000_000L);
    RecordResponse rawResponse = handler.doReadRecords(allocator, request);
    assertTrue(rawResponse instanceof ReadRecordsResponse);
    ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
    logger.info("testReadScanSplit: rows[{}]", response.getRecordCount());
    assertEquals(1000, response.getRecords().getRowCount());
    logger.info("testReadScanSplit: {}", BlockUtils.rowToString(response.getRecords(), 0));
}
Also used : ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) ReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) Split(com.amazonaws.athena.connector.lambda.domain.Split) Test(org.junit.Test)

Example 15 with ReadRecordsResponse

use of com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse in project aws-athena-query-federation by awslabs.

the class ElasticsearchRecordHandlerTest method doReadRecordsNoSpill.

@Test
public void doReadRecordsNoSpill() throws Exception {
    logger.info("doReadRecordsNoSpill: enter");
    SearchHit[] searchHit = new SearchHit[2];
    searchHit[0] = new SearchHit(1);
    searchHit[1] = new SearchHit(2);
    SearchHits searchHits = new SearchHits(searchHit, new TotalHits(2, TotalHits.Relation.EQUAL_TO), 4);
    when(mockResponse.getHits()).thenReturn(searchHits);
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put("myshort", SortedRangeSet.copyOf(Types.MinorType.SMALLINT.getType(), ImmutableList.of(Range.range(allocator, Types.MinorType.SMALLINT.getType(), (short) 1955, false, (short) 1972, true)), false));
    List<String> expectedProjection = new ArrayList<>();
    mapping.getFields().forEach(field -> expectedProjection.add(field.getName()));
    String expectedPredicate = "(_exists_:myshort) AND myshort:({1955 TO 1972])";
    ReadRecordsRequest request = new ReadRecordsRequest(fakeIdentity(), "elasticsearch", "queryId-" + System.currentTimeMillis(), new TableName("movies", "mishmash"), mapping, split, new Constraints(constraintsMap), // 100GB don't expect this to spill
    100_000_000_000L, 100_000_000_000L);
    RecordResponse rawResponse = handler.doReadRecords(allocator, request);
    // Capture the SearchRequest object from the call to client.getDocuments().
    // The former contains information such as the projection and predicate.
    ArgumentCaptor<SearchRequest> argumentCaptor = ArgumentCaptor.forClass(SearchRequest.class);
    verify(mockClient).getDocuments(argumentCaptor.capture());
    SearchRequest searchRequest = argumentCaptor.getValue();
    // Get the actual projection and compare to the expected one.
    List<String> actualProjection = ImmutableList.copyOf(searchRequest.source().fetchSource().includes());
    assertEquals("Projections do not match", expectedProjection, actualProjection);
    // Get the actual predicate and compare to the expected one.
    String actualPredicate = searchRequest.source().query().queryName();
    assertEquals("Predicates do not match", expectedPredicate, actualPredicate);
    assertTrue(rawResponse instanceof ReadRecordsResponse);
    ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
    logger.info("doReadRecordsNoSpill: rows[{}]", response.getRecordCount());
    assertEquals(2, response.getRecords().getRowCount());
    for (int i = 0; i < response.getRecords().getRowCount(); ++i) {
        logger.info("doReadRecordsNoSpill - Row: {}, {}", i, BlockUtils.rowToString(response.getRecords(), i));
    }
    logger.info("doReadRecordsNoSpill: exit");
}
Also used : TotalHits(org.apache.lucene.search.TotalHits) SearchRequest(org.elasticsearch.action.search.SearchRequest) SearchHit(org.elasticsearch.search.SearchHit) HashMap(java.util.HashMap) ReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse) RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) ArrayList(java.util.ArrayList) Mockito.anyString(org.mockito.Mockito.anyString) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) SearchHits(org.elasticsearch.search.SearchHits) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) Test(org.junit.Test)

Aggregations

ReadRecordsResponse (com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse)33 ReadRecordsRequest (com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest)27 Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)26 RecordResponse (com.amazonaws.athena.connector.lambda.records.RecordResponse)26 Test (org.junit.Test)25 Split (com.amazonaws.athena.connector.lambda.domain.Split)17 ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)16 HashMap (java.util.HashMap)16 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)15 RemoteReadRecordsResponse (com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse)15 ArrayList (java.util.ArrayList)15 Matchers.anyString (org.mockito.Matchers.anyString)15 S3SpillLocation (com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation)12 Schema (org.apache.arrow.vector.types.pojo.Schema)11 InvocationOnMock (org.mockito.invocation.InvocationOnMock)11 Block (com.amazonaws.athena.connector.lambda.data.Block)7 BlockAllocatorImpl (com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl)6 GetTableResponse (com.amazonaws.athena.connector.lambda.metadata.GetTableResponse)6 InputStream (java.io.InputStream)6 GetTableRequest (com.amazonaws.athena.connector.lambda.metadata.GetTableRequest)5