Search in sources :

Example 16 with ReadRecordsResponse

use of com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse in project aws-athena-query-federation by awslabs.

the class DynamoDBRecordHandlerTest method testReadQuerySplit.

@Test
public void testReadQuerySplit() throws Exception {
    Map<String, String> expressionNames = ImmutableMap.of("#col_1", "col_1");
    Map<String, AttributeValue> expressionValues = ImmutableMap.of(":v0", toAttributeValue(1));
    Split split = Split.newBuilder(SPILL_LOCATION, keyFactory.create()).add(TABLE_METADATA, TEST_TABLE).add(HASH_KEY_NAME_METADATA, "col_0").add("col_0", toJsonString(toAttributeValue("test_str_0"))).add(RANGE_KEY_FILTER_METADATA, "#col_1 >= :v0").add(EXPRESSION_NAMES_METADATA, toJsonString(expressionNames)).add(EXPRESSION_VALUES_METADATA, toJsonString(expressionValues)).build();
    ReadRecordsRequest request = new ReadRecordsRequest(TEST_IDENTITY, TEST_CATALOG_NAME, TEST_QUERY_ID, TEST_TABLE_NAME, schema, split, new Constraints(ImmutableMap.of()), // too big to spill
    100_000_000_000L, 100_000_000_000L);
    RecordResponse rawResponse = handler.doReadRecords(allocator, request);
    assertTrue(rawResponse instanceof ReadRecordsResponse);
    ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
    logger.info("testReadQuerySplit: rows[{}]", response.getRecordCount());
    assertEquals(2, response.getRecords().getRowCount());
    logger.info("testReadQuerySplit: {}", BlockUtils.rowToString(response.getRecords(), 0));
}
Also used : ItemUtils.toAttributeValue(com.amazonaws.services.dynamodbv2.document.ItemUtils.toAttributeValue) AttributeValue(com.amazonaws.services.dynamodbv2.model.AttributeValue) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) ReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse) Jackson.toJsonString(com.amazonaws.util.json.Jackson.toJsonString) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) Split(com.amazonaws.athena.connector.lambda.domain.Split) Test(org.junit.Test)

Example 17 with ReadRecordsResponse

use of com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse in project aws-athena-query-federation by awslabs.

the class ConnectorValidator method readRecords.

private static ReadRecordsResponse readRecords(TestConfig testConfig, TableName table, Schema schema, Collection<Split> splits) {
    Constraints constraints = parseConstraints(schema, testConfig.getConstraints());
    Split split = getRandomElement(splits);
    log.info("Executing randomly selected split with properties: {}", split.getProperties());
    ReadRecordsResponse records = LambdaRecordProvider.readRecords(testConfig.getCatalogId(), table, constraints, schema, split, testConfig.getRecordFunction(), testConfig.getIdentity());
    log.info("Received " + records.getRecordCount() + " records.");
    checkState(records.getRecordCount() > 0, "Table " + toQualifiedTableName(table) + " did not return any rows in the tested split, even though an empty constraint was used." + " This can happen if the table is empty but could also indicate an issue." + " Please populate the table or specify a different table.");
    log.info("Discovered columns: " + records.getSchema().getFields().stream().map(f -> f.getName() + ":" + f.getType().getTypeID()).collect(Collectors.toList()));
    if (records.getRecordCount() == 0) {
        return records;
    }
    log.info("First row of split: " + rowToString(records.getRecords(), 0));
    return records;
}
Also used : GetSplitsResponse(com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse) Schema(org.apache.arrow.vector.types.pojo.Schema) Options(org.apache.commons.cli.Options) LoggerFactory(org.slf4j.LoggerFactory) Random(java.util.Random) BlockAllocator(com.amazonaws.athena.connector.lambda.data.BlockAllocator) HelpFormatter(org.apache.commons.cli.HelpFormatter) DefaultParser(org.apache.commons.cli.DefaultParser) ArrayList(java.util.ArrayList) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Objects.requireNonNull(java.util.Objects.requireNonNull) CommandLine(org.apache.commons.cli.CommandLine) BlockAllocatorImpl(com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl) FederatedIdentity(com.amazonaws.athena.connector.lambda.security.FederatedIdentity) ListTablesResponse(com.amazonaws.athena.connector.lambda.metadata.ListTablesResponse) ListSchemasResponse(com.amazonaws.athena.connector.lambda.metadata.ListSchemasResponse) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) Collection(java.util.Collection) Split(com.amazonaws.athena.connector.lambda.domain.Split) ReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse) Set(java.util.Set) BlockUtils.rowToString(com.amazonaws.athena.connector.lambda.data.BlockUtils.rowToString) Field(org.apache.arrow.vector.types.pojo.Field) Collectors(java.util.stream.Collectors) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Sets(com.google.common.collect.Sets) Preconditions.checkState(com.google.common.base.Preconditions.checkState) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) GetTableLayoutResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse) List(java.util.List) ConstraintParser.parseConstraints(com.amazonaws.athena.connector.validation.ConstraintParser.parseConstraints) ParseException(org.apache.commons.cli.ParseException) Optional(java.util.Optional) Collections(java.util.Collections) GetTableResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableResponse) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) ConstraintParser.parseConstraints(com.amazonaws.athena.connector.validation.ConstraintParser.parseConstraints) ReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse) Split(com.amazonaws.athena.connector.lambda.domain.Split)

Example 18 with ReadRecordsResponse

use of com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse in project foundry-athena-query-federation-connector by palantir.

the class FoundryRecordHandlerTest method testReadRecordsSingle.

@Test
void testReadRecordsSingle() throws Exception {
    when(readRecordsRequest.getCatalogName()).thenReturn("catalog");
    List<List<Object>> data = ImmutableList.of(ImmutableList.of(1, 2), ImmutableList.of(1.1, 2.2));
    InputStream stream = ArrowUtils.writeToStream(TestConstants.SCHEMA, ImmutableList.of(ArrowUtils.createBatch(TestConstants.SCHEMA, data)));
    when(recordService.fetchSlice(any(), any())).thenReturn(stream);
    RecordResponse response = handler.doReadRecords(allocator, readRecordsRequest);
    assetRoundTripSerializable(response);
    assertThat(response).isInstanceOf(ReadRecordsResponse.class);
    ReadRecordsResponse readRecordsResponse = (ReadRecordsResponse) response;
    assertThat(TestUtils.readBlockDataAsColumns(TestConstants.SCHEMA, readRecordsResponse.getRecords())).containsExactlyElementsOf(data);
    assertThat(s3).isEmpty();
}
Also used : InputStream(java.io.InputStream) RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) ReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) Test(org.junit.jupiter.api.Test)

Example 19 with ReadRecordsResponse

use of com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse in project foundry-athena-query-federation-connector by palantir.

the class FoundryRecordHandler method doReadRecords.

@Override
@SuppressWarnings("MustBeClosedChecker")
public RecordResponse doReadRecords(BlockAllocator allocator, ReadRecordsRequest request) throws Exception {
    log.info("doReadRecords: {}:{}", request.getSchema(), request.getSplit().getSpillLocation());
    log.debug("Reading records with constraints: {}", request.getConstraints());
    SpillConfig spillConfig = getSpillConfig(request);
    S3Spiller spiller = new S3Spiller(amazonS3, spillConfig, allocator);
    List<String> columnNames = request.getSchema().getFields().stream().map(Field::getName).collect(Collectors.toList());
    // create a temporary block to obtain a handle to the BufferAllocator and allocator id
    BufferAllocator bufferAllocator;
    String allocatorId;
    try (Block block = allocator.createBlock(request.getSchema())) {
        bufferAllocator = block.getFieldVectors().get(0).getAllocator();
        allocatorId = block.getAllocatorId();
    }
    throttlingInvoker.setBlockSpiller(spiller);
    try (QueryStatusChecker queryStatusChecker = new QueryStatusChecker(athena, athenaInvoker, request.getQueryId());
        InputStream is = throttlingInvoker.invoke(() -> recordService.fetchSlice(foundryAuthProvider.getAuthHeader(), FetchSliceRequest.builder().slice(Slices.INSTANCE.fromSplit(request.getSplit())).columnNames(columnNames).maxBatchSize(SafeLong.of(spillConfig.getMaxBlockBytes())).build()))) {
        // we do not auto-close the reader to avoid releasing the buffers before serialization in the case
        // the block is held in memory
        PeekableArrowStreamReader reader = new PeekableArrowStreamReader(is, bufferAllocator);
        VectorSchemaRoot vectorSchemaRoot = reader.getVectorSchemaRoot();
        Block block = new Block(allocatorId, request.getSchema(), vectorSchemaRoot);
        reader.loadNextBatch();
        // spill if we have more blocks to read or the current block is too large to return
        if (reader.hasNextBatch() || block.getSize() > spillConfig.getMaxInlineBlockSize()) {
            do {
                spiller.spillBlock(block);
            } while (queryStatusChecker.isQueryRunning() && reader.loadNextBatch());
            // we have spilled so we can clean up the reader
            reader.close();
            return new RemoteReadRecordsResponse(request.getCatalogName(), request.getSchema(), spiller.getSpillLocations(), spillConfig.getEncryptionKey());
        } else {
            // no more batches so immediately return the block
            return new ReadRecordsResponse(request.getCatalogName(), block);
        }
    }
}
Also used : VectorSchemaRoot(org.apache.arrow.vector.VectorSchemaRoot) SpillConfig(com.amazonaws.athena.connector.lambda.data.SpillConfig) RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) QueryStatusChecker(com.amazonaws.athena.connector.lambda.QueryStatusChecker) InputStream(java.io.InputStream) RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) ReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse) Block(com.amazonaws.athena.connector.lambda.data.Block) BufferAllocator(org.apache.arrow.memory.BufferAllocator)

Example 20 with ReadRecordsResponse

use of com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse in project aws-athena-query-federation by awslabs.

the class ReadRecordsResponseSerDeTest method beforeTest.

@Before
public void beforeTest() throws IOException {
    String yearCol = "year";
    String monthCol = "month";
    String dayCol = "day";
    Schema schema = SchemaBuilder.newBuilder().addField(yearCol, new ArrowType.Int(32, true)).addField(monthCol, new ArrowType.Int(32, true)).addField(dayCol, new ArrowType.Int(32, true)).build();
    Block records = allocator.createBlock(schema);
    int num_records = 10;
    for (int i = 0; i < num_records; i++) {
        BlockUtils.setValue(records.getFieldVector(yearCol), i, 2016 + i);
        BlockUtils.setValue(records.getFieldVector(monthCol), i, (i % 12) + 1);
        BlockUtils.setValue(records.getFieldVector(dayCol), i, (i % 28) + 1);
    }
    records.setRowCount(num_records);
    expected = new ReadRecordsResponse("test-catalog", records);
    String expectedSerDeFile = utils.getResourceOrFail("serde/v2", "ReadRecordsResponse.json");
    expectedSerDeText = utils.readAllAsString(expectedSerDeFile).trim();
}
Also used : ReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse) Schema(org.apache.arrow.vector.types.pojo.Schema) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) Block(com.amazonaws.athena.connector.lambda.data.Block) Before(org.junit.Before)

Aggregations

ReadRecordsResponse (com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse)33 ReadRecordsRequest (com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest)27 Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)26 RecordResponse (com.amazonaws.athena.connector.lambda.records.RecordResponse)26 Test (org.junit.Test)25 Split (com.amazonaws.athena.connector.lambda.domain.Split)17 ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)16 HashMap (java.util.HashMap)16 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)15 RemoteReadRecordsResponse (com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse)15 ArrayList (java.util.ArrayList)15 Matchers.anyString (org.mockito.Matchers.anyString)15 S3SpillLocation (com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation)12 Schema (org.apache.arrow.vector.types.pojo.Schema)11 InvocationOnMock (org.mockito.invocation.InvocationOnMock)11 Block (com.amazonaws.athena.connector.lambda.data.Block)7 BlockAllocatorImpl (com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl)6 GetTableResponse (com.amazonaws.athena.connector.lambda.metadata.GetTableResponse)6 InputStream (java.io.InputStream)6 GetTableRequest (com.amazonaws.athena.connector.lambda.metadata.GetTableRequest)5