Search in sources :

Example 71 with Constraints

use of com.amazonaws.athena.connector.lambda.domain.predicate.Constraints in project aws-athena-query-federation by awslabs.

the class DocDBRecordHandlerTest method doReadRecordsNoSpill.

@Test
public void doReadRecordsNoSpill() throws Exception {
    List<Document> documents = new ArrayList<>();
    int docNum = 11;
    Document doc1 = DocumentGenerator.makeRandomRow(schemaForRead.getFields(), docNum++);
    documents.add(doc1);
    doc1.put("col3", 22.0D);
    Document doc2 = DocumentGenerator.makeRandomRow(schemaForRead.getFields(), docNum++);
    documents.add(doc2);
    doc2.put("col3", 22.0D);
    Document doc3 = DocumentGenerator.makeRandomRow(schemaForRead.getFields(), docNum++);
    documents.add(doc3);
    doc3.put("col3", 21.0D);
    doc3.put("unsupported", new UnsupportedType());
    when(mockCollection.find(any(Document.class))).thenAnswer((InvocationOnMock invocationOnMock) -> {
        logger.info("doReadRecordsNoSpill: query[{}]", invocationOnMock.getArguments()[0]);
        return mockIterable;
    });
    when(mockIterable.projection(any(Document.class))).thenAnswer((InvocationOnMock invocationOnMock) -> {
        logger.info("doReadRecordsNoSpill: projection[{}]", invocationOnMock.getArguments()[0]);
        return mockIterable;
    });
    when(mockIterable.batchSize(anyInt())).thenReturn(mockIterable);
    when(mockIterable.iterator()).thenReturn(new StubbingCursor(documents.iterator()));
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put("col3", SortedRangeSet.copyOf(Types.MinorType.FLOAT8.getType(), ImmutableList.of(Range.equal(allocator, Types.MinorType.FLOAT8.getType(), 22.0D)), false));
    S3SpillLocation splitLoc = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
    ReadRecordsRequest request = new ReadRecordsRequest(IDENTITY, DEFAULT_CATALOG, "queryId-" + System.currentTimeMillis(), TABLE_NAME, schemaForRead, Split.newBuilder(splitLoc, keyFactory.create()).add(DOCDB_CONN_STR, CONNECTION_STRING).build(), new Constraints(constraintsMap), // 100GB don't expect this to spill
    100_000_000_000L, 100_000_000_000L);
    RecordResponse rawResponse = handler.doReadRecords(allocator, request);
    assertTrue(rawResponse instanceof ReadRecordsResponse);
    ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
    logger.info("doReadRecordsNoSpill: rows[{}]", response.getRecordCount());
    assertTrue(response.getRecords().getRowCount() == 2);
    logger.info("doReadRecordsNoSpill: {}", BlockUtils.rowToString(response.getRecords(), 0));
}
Also used : HashMap(java.util.HashMap) ReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse) RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) ArrayList(java.util.ArrayList) Matchers.anyString(org.mockito.Matchers.anyString) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) Document(org.bson.Document) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) InvocationOnMock(org.mockito.invocation.InvocationOnMock) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) Test(org.junit.Test)

Example 72 with Constraints

use of com.amazonaws.athena.connector.lambda.domain.predicate.Constraints in project aws-athena-query-federation by awslabs.

the class DocDBRecordHandlerTest method doReadRecordsSpill.

@Test
public void doReadRecordsSpill() throws Exception {
    List<Document> documents = new ArrayList<>();
    for (int docNum = 0; docNum < 20_000; docNum++) {
        documents.add(DocumentGenerator.makeRandomRow(schemaForRead.getFields(), docNum));
    }
    when(mockCollection.find(any(Document.class))).thenAnswer((InvocationOnMock invocationOnMock) -> {
        logger.info("doReadRecordsNoSpill: query[{}]", invocationOnMock.getArguments()[0]);
        return mockIterable;
    });
    when(mockIterable.projection(any(Document.class))).thenAnswer((InvocationOnMock invocationOnMock) -> {
        logger.info("doReadRecordsNoSpill: projection[{}]", invocationOnMock.getArguments()[0]);
        return mockIterable;
    });
    when(mockIterable.batchSize(anyInt())).thenReturn(mockIterable);
    when(mockIterable.iterator()).thenReturn(new StubbingCursor(documents.iterator()));
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put("col3", SortedRangeSet.copyOf(Types.MinorType.FLOAT8.getType(), ImmutableList.of(Range.greaterThan(allocator, Types.MinorType.FLOAT8.getType(), -10000D)), false));
    S3SpillLocation splitLoc = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
    ReadRecordsRequest request = new ReadRecordsRequest(IDENTITY, DEFAULT_CATALOG, "queryId-" + System.currentTimeMillis(), TABLE_NAME, schemaForRead, Split.newBuilder(splitLoc, keyFactory.create()).add(DOCDB_CONN_STR, CONNECTION_STRING).build(), new Constraints(constraintsMap), // ~1.5MB so we should see some spill
    1_500_000L, 0L);
    RecordResponse rawResponse = handler.doReadRecords(allocator, request);
    assertTrue(rawResponse instanceof RemoteReadRecordsResponse);
    try (RemoteReadRecordsResponse response = (RemoteReadRecordsResponse) rawResponse) {
        logger.info("doReadRecordsSpill: remoteBlocks[{}]", response.getRemoteBlocks().size());
        assertTrue(response.getNumberBlocks() > 1);
        int blockNum = 0;
        for (SpillLocation next : response.getRemoteBlocks()) {
            S3SpillLocation spillLocation = (S3SpillLocation) next;
            try (Block block = spillReader.read(spillLocation, response.getEncryptionKey(), response.getSchema())) {
                logger.info("doReadRecordsSpill: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount());
                // assertTrue(++blockNum < response.getRemoteBlocks().size() && block.getRowCount() > 10_000);
                logger.info("doReadRecordsSpill: {}", BlockUtils.rowToString(block, 0));
                assertNotNull(BlockUtils.rowToString(block, 0));
            }
        }
    }
}
Also used : RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Matchers.anyString(org.mockito.Matchers.anyString) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) Document(org.bson.Document) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) InvocationOnMock(org.mockito.invocation.InvocationOnMock) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) Block(com.amazonaws.athena.connector.lambda.data.Block) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) Test(org.junit.Test)

Example 73 with Constraints

use of com.amazonaws.athena.connector.lambda.domain.predicate.Constraints in project aws-athena-query-federation by awslabs.

the class CloudwatchRecordHandlerTest method doReadRecordsSpill.

@Test
public void doReadRecordsSpill() throws Exception {
    logger.info("doReadRecordsSpill: enter");
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put("time", SortedRangeSet.of(Range.range(allocator, Types.MinorType.BIGINT.getType(), 100L, true, 100_000_000L, true)));
    ReadRecordsRequest request = new ReadRecordsRequest(identity, "catalog", "queryId-" + System.currentTimeMillis(), new TableName("schema", "table"), schemaForRead, Split.newBuilder(S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build(), keyFactory.create()).add(CloudwatchMetadataHandler.LOG_STREAM_FIELD, "table").build(), new Constraints(constraintsMap), // ~1.5MB so we should see some spill
    1_500_000L, 0);
    RecordResponse rawResponse = handler.doReadRecords(allocator, request);
    assertTrue(rawResponse instanceof RemoteReadRecordsResponse);
    try (RemoteReadRecordsResponse response = (RemoteReadRecordsResponse) rawResponse) {
        logger.info("doReadRecordsSpill: remoteBlocks[{}]", response.getRemoteBlocks().size());
        assertTrue(response.getNumberBlocks() > 1);
        int blockNum = 0;
        for (SpillLocation next : response.getRemoteBlocks()) {
            S3SpillLocation spillLocation = (S3SpillLocation) next;
            try (Block block = spillReader.read(spillLocation, response.getEncryptionKey(), response.getSchema())) {
                logger.info("doReadRecordsSpill: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount());
                // assertTrue(++blockNum < response.getRemoteBlocks().size() && block.getRowCount() > 10_000);
                logger.info("doReadRecordsSpill: {}", BlockUtils.rowToString(block, 0));
                assertNotNull(BlockUtils.rowToString(block, 0));
            }
        }
    }
    logger.info("doReadRecordsSpill: exit");
}
Also used : RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) HashMap(java.util.HashMap) Matchers.anyString(org.mockito.Matchers.anyString) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) Block(com.amazonaws.athena.connector.lambda.data.Block) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) Test(org.junit.Test)

Example 74 with Constraints

use of com.amazonaws.athena.connector.lambda.domain.predicate.Constraints in project aws-athena-query-federation by awslabs.

the class MetricsMetadataHandlerTest method doGetMetricsSplits.

@Test
public void doGetMetricsSplits() throws Exception {
    logger.info("doGetMetricsSplits: enter");
    Schema schema = SchemaBuilder.newBuilder().addIntField("partitionId").build();
    Block partitions = allocator.createBlock(schema);
    BlockUtils.setValue(partitions.getFieldVector("partitionId"), 1, 1);
    partitions.setRowCount(1);
    String continuationToken = null;
    GetSplitsRequest originalReq = new GetSplitsRequest(identity, "queryId", "catalog_name", new TableName(defaultSchema, "metrics"), partitions, Collections.singletonList("partitionId"), new Constraints(new HashMap<>()), continuationToken);
    int numContinuations = 0;
    do {
        GetSplitsRequest req = new GetSplitsRequest(originalReq, continuationToken);
        logger.info("doGetMetricsSplits: req[{}]", req);
        MetadataResponse rawResponse = handler.doGetSplits(allocator, req);
        assertEquals(MetadataRequestType.GET_SPLITS, rawResponse.getRequestType());
        GetSplitsResponse response = (GetSplitsResponse) rawResponse;
        continuationToken = response.getContinuationToken();
        logger.info("doGetMetricsSplits: continuationToken[{}] - numSplits[{}]", continuationToken, response.getSplits().size());
        assertEquals(1, response.getSplits().size());
        if (continuationToken != null) {
            numContinuations++;
        }
    } while (continuationToken != null);
    assertEquals(0, numContinuations);
    logger.info("doGetMetricsSplits: exit");
}
Also used : TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) GetSplitsRequest(com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest) HashMap(java.util.HashMap) GetSplitsResponse(com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse) Schema(org.apache.arrow.vector.types.pojo.Schema) MetadataResponse(com.amazonaws.athena.connector.lambda.metadata.MetadataResponse) Block(com.amazonaws.athena.connector.lambda.data.Block) Test(org.junit.Test)

Example 75 with Constraints

use of com.amazonaws.athena.connector.lambda.domain.predicate.Constraints in project aws-athena-query-federation by awslabs.

the class MetricsMetadataHandlerTest method doGetTableLayout.

@Test
public void doGetTableLayout() throws Exception {
    logger.info("doGetTableLayout - enter");
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put(METRIC_NAME_FIELD, EquatableValueSet.newBuilder(allocator, Types.MinorType.VARCHAR.getType(), true, false).add("MyMetric").build());
    GetTableLayoutRequest req = new GetTableLayoutRequest(identity, "queryId", "default", new TableName(defaultSchema, "metrics"), new Constraints(constraintsMap), SchemaBuilder.newBuilder().build(), Collections.EMPTY_SET);
    GetTableLayoutResponse res = handler.doGetTableLayout(allocator, req);
    logger.info("doGetTableLayout - {}", res.getPartitions().getSchema());
    logger.info("doGetTableLayout - {}", res.getPartitions());
    assertTrue(res.getPartitions().getRowCount() == 1);
    logger.info("doGetTableLayout - exit");
}
Also used : TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) GetTableLayoutResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse) HashMap(java.util.HashMap) GetTableLayoutRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) Test(org.junit.Test)

Aggregations

Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)182 Test (org.junit.Test)172 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)148 Schema (org.apache.arrow.vector.types.pojo.Schema)136 ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)64 HashMap (java.util.HashMap)63 Split (com.amazonaws.athena.connector.lambda.domain.Split)59 ReadRecordsRequest (com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest)55 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)54 BlockAllocator (com.amazonaws.athena.connector.lambda.data.BlockAllocator)50 GetTableLayoutRequest (com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest)47 BlockAllocatorImpl (com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl)44 ArrayList (java.util.ArrayList)44 PreparedStatement (java.sql.PreparedStatement)42 GetTableLayoutResponse (com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse)41 Block (com.amazonaws.athena.connector.lambda.data.Block)36 SchemaBuilder (com.amazonaws.athena.connector.lambda.data.SchemaBuilder)35 RecordResponse (com.amazonaws.athena.connector.lambda.records.RecordResponse)33 ResultSet (java.sql.ResultSet)32 GetSplitsResponse (com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse)30