Search in sources :

Example 6 with RemoteReadRecordsResponse

use of com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse in project foundry-athena-query-federation-connector by palantir.

the class FoundryRecordHandler method doReadRecords.

@Override
@SuppressWarnings("MustBeClosedChecker")
public RecordResponse doReadRecords(BlockAllocator allocator, ReadRecordsRequest request) throws Exception {
    log.info("doReadRecords: {}:{}", request.getSchema(), request.getSplit().getSpillLocation());
    log.debug("Reading records with constraints: {}", request.getConstraints());
    SpillConfig spillConfig = getSpillConfig(request);
    S3Spiller spiller = new S3Spiller(amazonS3, spillConfig, allocator);
    List<String> columnNames = request.getSchema().getFields().stream().map(Field::getName).collect(Collectors.toList());
    // create a temporary block to obtain a handle to the BufferAllocator and allocator id
    BufferAllocator bufferAllocator;
    String allocatorId;
    try (Block block = allocator.createBlock(request.getSchema())) {
        bufferAllocator = block.getFieldVectors().get(0).getAllocator();
        allocatorId = block.getAllocatorId();
    }
    throttlingInvoker.setBlockSpiller(spiller);
    try (QueryStatusChecker queryStatusChecker = new QueryStatusChecker(athena, athenaInvoker, request.getQueryId());
        InputStream is = throttlingInvoker.invoke(() -> recordService.fetchSlice(foundryAuthProvider.getAuthHeader(), FetchSliceRequest.builder().slice(Slices.INSTANCE.fromSplit(request.getSplit())).columnNames(columnNames).maxBatchSize(SafeLong.of(spillConfig.getMaxBlockBytes())).build()))) {
        // we do not auto-close the reader to avoid releasing the buffers before serialization in the case
        // the block is held in memory
        PeekableArrowStreamReader reader = new PeekableArrowStreamReader(is, bufferAllocator);
        VectorSchemaRoot vectorSchemaRoot = reader.getVectorSchemaRoot();
        Block block = new Block(allocatorId, request.getSchema(), vectorSchemaRoot);
        reader.loadNextBatch();
        // spill if we have more blocks to read or the current block is too large to return
        if (reader.hasNextBatch() || block.getSize() > spillConfig.getMaxInlineBlockSize()) {
            do {
                spiller.spillBlock(block);
            } while (queryStatusChecker.isQueryRunning() && reader.loadNextBatch());
            // we have spilled so we can clean up the reader
            reader.close();
            return new RemoteReadRecordsResponse(request.getCatalogName(), request.getSchema(), spiller.getSpillLocations(), spillConfig.getEncryptionKey());
        } else {
            // no more batches so immediately return the block
            return new ReadRecordsResponse(request.getCatalogName(), block);
        }
    }
}
Also used : VectorSchemaRoot(org.apache.arrow.vector.VectorSchemaRoot) SpillConfig(com.amazonaws.athena.connector.lambda.data.SpillConfig) RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) QueryStatusChecker(com.amazonaws.athena.connector.lambda.QueryStatusChecker) InputStream(java.io.InputStream) RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) ReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse) Block(com.amazonaws.athena.connector.lambda.data.Block) BufferAllocator(org.apache.arrow.memory.BufferAllocator)

Example 7 with RemoteReadRecordsResponse

use of com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse in project aws-athena-query-federation by awslabs.

the class ExampleRecordHandlerTest method doReadRecordsSpill.

@Test
public void doReadRecordsSpill() throws Exception {
    logger.info("doReadRecordsSpill: enter");
    for (int i = 0; i < 2; i++) {
        EncryptionKey encryptionKey = (i % 2 == 0) ? keyFactory.create() : null;
        logger.info("doReadRecordsSpill: Using encryptionKey[" + encryptionKey + "]");
        Map<String, ValueSet> constraintsMap = new HashMap<>();
        constraintsMap.put("col3", SortedRangeSet.copyOf(Types.MinorType.FLOAT8.getType(), ImmutableList.of(Range.greaterThan(allocator, Types.MinorType.FLOAT8.getType(), -10000D)), false));
        constraintsMap.put("unknown", EquatableValueSet.newBuilder(allocator, Types.MinorType.FLOAT8.getType(), false, true).add(1.1D).build());
        constraintsMap.put("unknown2", new AllOrNoneValueSet(Types.MinorType.FLOAT8.getType(), false, true));
        ReadRecordsRequest request = new ReadRecordsRequest(IdentityUtil.fakeIdentity(), "catalog", "queryId-" + System.currentTimeMillis(), new TableName("schema", "table"), schemaForRead, Split.newBuilder(makeSpillLocation(), encryptionKey).add("year", "10").add("month", "10").add("day", "10").build(), new Constraints(constraintsMap), // ~1.5MB so we should see some spill
        1_600_000L, 1000L);
        ObjectMapperUtil.assertSerialization(request);
        RecordResponse rawResponse = recordService.readRecords(request);
        ObjectMapperUtil.assertSerialization(rawResponse);
        assertTrue(rawResponse instanceof RemoteReadRecordsResponse);
        try (RemoteReadRecordsResponse response = (RemoteReadRecordsResponse) rawResponse) {
            logger.info("doReadRecordsSpill: remoteBlocks[{}]", response.getRemoteBlocks().size());
            assertTrue(response.getNumberBlocks() > 1);
            int blockNum = 0;
            for (SpillLocation next : response.getRemoteBlocks()) {
                S3SpillLocation spillLocation = (S3SpillLocation) next;
                try (Block block = spillReader.read(spillLocation, response.getEncryptionKey(), response.getSchema())) {
                    logger.info("doReadRecordsSpill: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount());
                    // assertTrue(++blockNum < response.getRemoteBlocks().size() && block.getRowCount() > 10_000);
                    logger.info("doReadRecordsSpill: {}", BlockUtils.rowToString(block, 0));
                    assertNotNull(BlockUtils.rowToString(block, 0));
                }
            }
        }
    }
    logger.info("doReadRecordsSpill: exit");
}
Also used : RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) HashMap(java.util.HashMap) AllOrNoneValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.AllOrNoneValueSet) EncryptionKey(com.amazonaws.athena.connector.lambda.security.EncryptionKey) Matchers.anyString(org.mockito.Matchers.anyString) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) Block(com.amazonaws.athena.connector.lambda.data.Block) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) AllOrNoneValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.AllOrNoneValueSet) Test(org.junit.Test)

Example 8 with RemoteReadRecordsResponse

use of com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse in project aws-athena-query-federation by awslabs.

the class HbaseRecordHandlerTest method doReadRecordsSpill.

@Test
public void doReadRecordsSpill() throws Exception {
    List<Result> results = TestUtils.makeResults(10_000);
    ResultScanner mockScanner = mock(ResultScanner.class);
    when(mockScanner.iterator()).thenReturn(results.iterator());
    when(mockClient.scanTable(anyObject(), any(Scan.class), anyObject())).thenAnswer((InvocationOnMock invocationOnMock) -> {
        ResultProcessor processor = (ResultProcessor) invocationOnMock.getArguments()[2];
        return processor.scan(mockScanner);
    });
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put("family1:col3", SortedRangeSet.copyOf(Types.MinorType.BIGINT.getType(), ImmutableList.of(Range.greaterThan(allocator, Types.MinorType.BIGINT.getType(), 0L)), true));
    S3SpillLocation splitLoc = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
    Split.Builder splitBuilder = Split.newBuilder(splitLoc, keyFactory.create()).add(HBASE_CONN_STR, "fake_con_str").add(START_KEY_FIELD, "fake_start_key").add(END_KEY_FIELD, "fake_end_key").add(REGION_ID_FIELD, "fake_region_id").add(REGION_NAME_FIELD, "fake_region_name");
    ReadRecordsRequest request = new ReadRecordsRequest(IDENTITY, DEFAULT_CATALOG, "queryId-" + System.currentTimeMillis(), new TableName(DEFAULT_SCHEMA, TEST_TABLE), schemaForRead, splitBuilder.build(), new Constraints(constraintsMap), // ~1.5MB so we should see some spill
    1_500_000L, 0L);
    RecordResponse rawResponse = handler.doReadRecords(allocator, request);
    assertTrue(rawResponse instanceof RemoteReadRecordsResponse);
    try (RemoteReadRecordsResponse response = (RemoteReadRecordsResponse) rawResponse) {
        logger.info("doReadRecordsSpill: remoteBlocks[{}]", response.getRemoteBlocks().size());
        assertTrue(response.getNumberBlocks() > 1);
        int blockNum = 0;
        for (SpillLocation next : response.getRemoteBlocks()) {
            S3SpillLocation spillLocation = (S3SpillLocation) next;
            try (Block block = spillReader.read(spillLocation, response.getEncryptionKey(), response.getSchema())) {
                logger.info("doReadRecordsSpill: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount());
                // assertTrue(++blockNum < response.getRemoteBlocks().size() && block.getRowCount() > 10_000);
                logger.info("doReadRecordsSpill: {}", BlockUtils.rowToString(block, 0));
                assertNotNull(BlockUtils.rowToString(block, 0));
            }
        }
    }
}
Also used : ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) HashMap(java.util.HashMap) Matchers.anyString(org.mockito.Matchers.anyString) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) Result(org.apache.hadoop.hbase.client.Result) PutObjectResult(com.amazonaws.services.s3.model.PutObjectResult) GetTableResult(com.amazonaws.services.glue.model.GetTableResult) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) InvocationOnMock(org.mockito.invocation.InvocationOnMock) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) Block(com.amazonaws.athena.connector.lambda.data.Block) Scan(org.apache.hadoop.hbase.client.Scan) ResultProcessor(com.amazonaws.athena.connectors.hbase.connection.ResultProcessor) Split(com.amazonaws.athena.connector.lambda.domain.Split) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) Test(org.junit.Test)

Example 9 with RemoteReadRecordsResponse

use of com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse in project aws-athena-query-federation by awslabs.

the class RemoteReadRecordsResponseSerDeTest method deserialize.

@Test
public void deserialize() throws IOException {
    logger.info("deserialize: enter");
    InputStream input = new ByteArrayInputStream(expectedSerDeText.getBytes());
    RemoteReadRecordsResponse actual = (RemoteReadRecordsResponse) mapper.readValue(input, FederationResponse.class);
    logger.info("deserialize: deserialized[{}]", actual);
    assertEquals(expected, actual);
    logger.info("deserialize: exit");
}
Also used : RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) ByteArrayInputStream(java.io.ByteArrayInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) FederationResponse(com.amazonaws.athena.connector.lambda.request.FederationResponse) TypedSerDeTest(com.amazonaws.athena.connector.lambda.serde.TypedSerDeTest) Test(org.junit.Test)

Example 10 with RemoteReadRecordsResponse

use of com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse in project aws-athena-query-federation by awslabs.

the class TimestreamRecordHandlerTest method doReadRecordsSpill.

@Test
public void doReadRecordsSpill() throws Exception {
    String expectedQuery = "SELECT measure_name, measure_value::double, az, time, hostname, region FROM \"my_schema\".\"my_table\" WHERE (\"az\" IN ('us-east-1a','us-east-1b'))";
    QueryResult mockResult = makeMockQueryResult(schemaForRead, 100_000);
    when(mockClient.query(any(QueryRequest.class))).thenAnswer((Answer<QueryResult>) invocationOnMock -> {
        QueryRequest request = (QueryRequest) invocationOnMock.getArguments()[0];
        assertEquals(expectedQuery, request.getQueryString().replace("\n", ""));
        return mockResult;
    });
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put("az", EquatableValueSet.newBuilder(allocator, Types.MinorType.VARCHAR.getType(), true, true).add("us-east-1a").add("us-east-1b").build());
    S3SpillLocation splitLoc = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
    Split.Builder splitBuilder = Split.newBuilder(splitLoc, keyFactory.create());
    ReadRecordsRequest request = new ReadRecordsRequest(IDENTITY, DEFAULT_CATALOG, "queryId-" + System.currentTimeMillis(), new TableName(DEFAULT_SCHEMA, TEST_TABLE), schemaForRead, splitBuilder.build(), new Constraints(constraintsMap), // ~1.5MB so we should see some spill
    1_500_000L, 0L);
    RecordResponse rawResponse = handler.doReadRecords(allocator, request);
    assertTrue(rawResponse instanceof RemoteReadRecordsResponse);
    try (RemoteReadRecordsResponse response = (RemoteReadRecordsResponse) rawResponse) {
        logger.info("doReadRecordsSpill: remoteBlocks[{}]", response.getRemoteBlocks().size());
        assertTrue(response.getNumberBlocks() > 1);
        int blockNum = 0;
        for (SpillLocation next : response.getRemoteBlocks()) {
            S3SpillLocation spillLocation = (S3SpillLocation) next;
            try (Block block = spillReader.read(spillLocation, response.getEncryptionKey(), response.getSchema())) {
                logger.info("doReadRecordsSpill: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount());
                // assertTrue(++blockNum < response.getRemoteBlocks().size() && block.getRowCount() > 10_000);
                logger.info("doReadRecordsSpill: {}", BlockUtils.rowToString(block, 0));
                assertNotNull(BlockUtils.rowToString(block, 0));
            }
        }
    }
}
Also used : QueryResult(com.amazonaws.services.timestreamquery.model.QueryResult) Schema(org.apache.arrow.vector.types.pojo.Schema) Types(org.apache.arrow.vector.types.Types) LoggerFactory(org.slf4j.LoggerFactory) BlockAllocator(com.amazonaws.athena.connector.lambda.data.BlockAllocator) SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation) Block(com.amazonaws.athena.connector.lambda.data.Block) ByteArrayInputStream(java.io.ByteArrayInputStream) After(org.junit.After) Map(java.util.Map) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) AmazonTimestreamQuery(com.amazonaws.services.timestreamquery.AmazonTimestreamQuery) BlockAllocatorImpl(com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl) Split(com.amazonaws.athena.connector.lambda.domain.Split) ReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse) UUID(java.util.UUID) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) Matchers.any(org.mockito.Matchers.any) List(java.util.List) ByteStreams(com.google.common.io.ByteStreams) BlockUtils(com.amazonaws.athena.connector.lambda.data.BlockUtils) S3ObjectInputStream(com.amazonaws.services.s3.model.S3ObjectInputStream) EncryptionKeyFactory(com.amazonaws.athena.connector.lambda.security.EncryptionKeyFactory) Mockito.mock(org.mockito.Mockito.mock) Mock(org.mockito.Mock) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) RunWith(org.junit.runner.RunWith) HashMap(java.util.HashMap) Matchers.anyString(org.mockito.Matchers.anyString) ArrayList(java.util.ArrayList) RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) Answer(org.mockito.stubbing.Answer) InvocationOnMock(org.mockito.invocation.InvocationOnMock) S3Object(com.amazonaws.services.s3.model.S3Object) SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) TestName(org.junit.rules.TestName) LocalKeyFactory(com.amazonaws.athena.connector.lambda.security.LocalKeyFactory) Matchers.anyObject(org.mockito.Matchers.anyObject) AmazonS3(com.amazonaws.services.s3.AmazonS3) FederatedIdentity(com.amazonaws.athena.connector.lambda.security.FederatedIdentity) PutObjectResult(com.amazonaws.services.s3.model.PutObjectResult) S3BlockSpillReader(com.amazonaws.athena.connector.lambda.data.S3BlockSpillReader) Before(org.junit.Before) Logger(org.slf4j.Logger) AmazonAthena(com.amazonaws.services.athena.AmazonAthena) Assert.assertNotNull(org.junit.Assert.assertNotNull) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Assert.assertTrue(org.junit.Assert.assertTrue) AWSSecretsManager(com.amazonaws.services.secretsmanager.AWSSecretsManager) Test(org.junit.Test) IOException(java.io.IOException) Mockito.when(org.mockito.Mockito.when) FieldBuilder(com.amazonaws.athena.connector.lambda.data.FieldBuilder) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) Rule(org.junit.Rule) MockitoJUnitRunner(org.mockito.runners.MockitoJUnitRunner) QueryRequest(com.amazonaws.services.timestreamquery.model.QueryRequest) TestUtils.makeMockQueryResult(com.amazonaws.athena.connectors.timestream.TestUtils.makeMockQueryResult) VIEW_METADATA_FIELD(com.amazonaws.athena.connector.lambda.handlers.GlueMetadataHandler.VIEW_METADATA_FIELD) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) InputStream(java.io.InputStream) RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) QueryRequest(com.amazonaws.services.timestreamquery.model.QueryRequest) SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) HashMap(java.util.HashMap) Matchers.anyString(org.mockito.Matchers.anyString) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) QueryResult(com.amazonaws.services.timestreamquery.model.QueryResult) TestUtils.makeMockQueryResult(com.amazonaws.athena.connectors.timestream.TestUtils.makeMockQueryResult) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) Block(com.amazonaws.athena.connector.lambda.data.Block) Split(com.amazonaws.athena.connector.lambda.domain.Split) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) Test(org.junit.Test)

Aggregations

RemoteReadRecordsResponse (com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse)12 Block (com.amazonaws.athena.connector.lambda.data.Block)9 S3SpillLocation (com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation)9 SpillLocation (com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation)9 Test (org.junit.Test)9 Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)8 ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)8 ReadRecordsRequest (com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest)8 RecordResponse (com.amazonaws.athena.connector.lambda.records.RecordResponse)8 HashMap (java.util.HashMap)8 Matchers.anyString (org.mockito.Matchers.anyString)7 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)6 EquatableValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet)4 ReadRecordsResponse (com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse)3 InputStream (java.io.InputStream)3 InvocationOnMock (org.mockito.invocation.InvocationOnMock)3 QueryStatusChecker (com.amazonaws.athena.connector.lambda.QueryStatusChecker)2 BlockAllocatorImpl (com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl)2 SpillConfig (com.amazonaws.athena.connector.lambda.data.SpillConfig)2 Split (com.amazonaws.athena.connector.lambda.domain.Split)2