Search in sources :

Example 21 with RecordResponse

use of com.amazonaws.athena.connector.lambda.records.RecordResponse in project foundry-athena-query-federation-connector by palantir.

the class FoundryRecordHandlerTest method testReadRecordsMultiple.

@Test
void testReadRecordsMultiple() throws Exception {
    mockS3();
    when(readRecordsRequest.getCatalogName()).thenReturn("catalog");
    List<List<Object>> firstBatchData = ImmutableList.of(ImmutableList.of(1, 2), ImmutableList.of(1.1, 2.2));
    List<List<Object>> secondBatchData = ImmutableList.of(ImmutableList.of(3, 4), ImmutableList.of(3.3, 4.4));
    InputStream stream = ArrowUtils.writeToStream(TestConstants.SCHEMA, ImmutableList.of(ArrowUtils.createBatch(TestConstants.SCHEMA, firstBatchData), ArrowUtils.createBatch(TestConstants.SCHEMA, secondBatchData)));
    when(recordService.fetchSlice(any(), any())).thenReturn(stream);
    RecordResponse response = handler.doReadRecords(allocator, readRecordsRequest);
    assetRoundTripSerializable(response);
    assertThat(response).isInstanceOf(RemoteReadRecordsResponse.class);
    assertThat(s3).hasSize(2);
    List<byte[]> blobs = new ArrayList<>(s3.values());
    assertThatSpillLocationContainsData(blobs.get(0), firstBatchData);
    assertThatSpillLocationContainsData(blobs.get(1), secondBatchData);
}
Also used : InputStream(java.io.InputStream) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) Test(org.junit.jupiter.api.Test)

Example 22 with RecordResponse

use of com.amazonaws.athena.connector.lambda.records.RecordResponse in project foundry-athena-query-federation-connector by palantir.

the class FoundryRecordHandlerTest method testReadRecordsSingle.

@Test
void testReadRecordsSingle() throws Exception {
    when(readRecordsRequest.getCatalogName()).thenReturn("catalog");
    List<List<Object>> data = ImmutableList.of(ImmutableList.of(1, 2), ImmutableList.of(1.1, 2.2));
    InputStream stream = ArrowUtils.writeToStream(TestConstants.SCHEMA, ImmutableList.of(ArrowUtils.createBatch(TestConstants.SCHEMA, data)));
    when(recordService.fetchSlice(any(), any())).thenReturn(stream);
    RecordResponse response = handler.doReadRecords(allocator, readRecordsRequest);
    assetRoundTripSerializable(response);
    assertThat(response).isInstanceOf(ReadRecordsResponse.class);
    ReadRecordsResponse readRecordsResponse = (ReadRecordsResponse) response;
    assertThat(TestUtils.readBlockDataAsColumns(TestConstants.SCHEMA, readRecordsResponse.getRecords())).containsExactlyElementsOf(data);
    assertThat(s3).isEmpty();
}
Also used : InputStream(java.io.InputStream) RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) ReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) Test(org.junit.jupiter.api.Test)

Example 23 with RecordResponse

use of com.amazonaws.athena.connector.lambda.records.RecordResponse in project aws-athena-query-federation by awslabs.

the class ExampleRecordHandlerTest method doReadRecordsSpill.

@Test
public void doReadRecordsSpill() throws Exception {
    logger.info("doReadRecordsSpill: enter");
    for (int i = 0; i < 2; i++) {
        EncryptionKey encryptionKey = (i % 2 == 0) ? keyFactory.create() : null;
        logger.info("doReadRecordsSpill: Using encryptionKey[" + encryptionKey + "]");
        Map<String, ValueSet> constraintsMap = new HashMap<>();
        constraintsMap.put("col3", SortedRangeSet.copyOf(Types.MinorType.FLOAT8.getType(), ImmutableList.of(Range.greaterThan(allocator, Types.MinorType.FLOAT8.getType(), -10000D)), false));
        constraintsMap.put("unknown", EquatableValueSet.newBuilder(allocator, Types.MinorType.FLOAT8.getType(), false, true).add(1.1D).build());
        constraintsMap.put("unknown2", new AllOrNoneValueSet(Types.MinorType.FLOAT8.getType(), false, true));
        ReadRecordsRequest request = new ReadRecordsRequest(IdentityUtil.fakeIdentity(), "catalog", "queryId-" + System.currentTimeMillis(), new TableName("schema", "table"), schemaForRead, Split.newBuilder(makeSpillLocation(), encryptionKey).add("year", "10").add("month", "10").add("day", "10").build(), new Constraints(constraintsMap), // ~1.5MB so we should see some spill
        1_600_000L, 1000L);
        ObjectMapperUtil.assertSerialization(request);
        RecordResponse rawResponse = recordService.readRecords(request);
        ObjectMapperUtil.assertSerialization(rawResponse);
        assertTrue(rawResponse instanceof RemoteReadRecordsResponse);
        try (RemoteReadRecordsResponse response = (RemoteReadRecordsResponse) rawResponse) {
            logger.info("doReadRecordsSpill: remoteBlocks[{}]", response.getRemoteBlocks().size());
            assertTrue(response.getNumberBlocks() > 1);
            int blockNum = 0;
            for (SpillLocation next : response.getRemoteBlocks()) {
                S3SpillLocation spillLocation = (S3SpillLocation) next;
                try (Block block = spillReader.read(spillLocation, response.getEncryptionKey(), response.getSchema())) {
                    logger.info("doReadRecordsSpill: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount());
                    // assertTrue(++blockNum < response.getRemoteBlocks().size() && block.getRowCount() > 10_000);
                    logger.info("doReadRecordsSpill: {}", BlockUtils.rowToString(block, 0));
                    assertNotNull(BlockUtils.rowToString(block, 0));
                }
            }
        }
    }
    logger.info("doReadRecordsSpill: exit");
}
Also used : RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) HashMap(java.util.HashMap) AllOrNoneValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.AllOrNoneValueSet) EncryptionKey(com.amazonaws.athena.connector.lambda.security.EncryptionKey) Matchers.anyString(org.mockito.Matchers.anyString) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) Block(com.amazonaws.athena.connector.lambda.data.Block) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) AllOrNoneValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.AllOrNoneValueSet) Test(org.junit.Test)

Example 24 with RecordResponse

use of com.amazonaws.athena.connector.lambda.records.RecordResponse in project aws-athena-query-federation by awslabs.

the class HbaseRecordHandlerTest method doReadRecordsSpill.

@Test
public void doReadRecordsSpill() throws Exception {
    List<Result> results = TestUtils.makeResults(10_000);
    ResultScanner mockScanner = mock(ResultScanner.class);
    when(mockScanner.iterator()).thenReturn(results.iterator());
    when(mockClient.scanTable(anyObject(), any(Scan.class), anyObject())).thenAnswer((InvocationOnMock invocationOnMock) -> {
        ResultProcessor processor = (ResultProcessor) invocationOnMock.getArguments()[2];
        return processor.scan(mockScanner);
    });
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put("family1:col3", SortedRangeSet.copyOf(Types.MinorType.BIGINT.getType(), ImmutableList.of(Range.greaterThan(allocator, Types.MinorType.BIGINT.getType(), 0L)), true));
    S3SpillLocation splitLoc = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
    Split.Builder splitBuilder = Split.newBuilder(splitLoc, keyFactory.create()).add(HBASE_CONN_STR, "fake_con_str").add(START_KEY_FIELD, "fake_start_key").add(END_KEY_FIELD, "fake_end_key").add(REGION_ID_FIELD, "fake_region_id").add(REGION_NAME_FIELD, "fake_region_name");
    ReadRecordsRequest request = new ReadRecordsRequest(IDENTITY, DEFAULT_CATALOG, "queryId-" + System.currentTimeMillis(), new TableName(DEFAULT_SCHEMA, TEST_TABLE), schemaForRead, splitBuilder.build(), new Constraints(constraintsMap), // ~1.5MB so we should see some spill
    1_500_000L, 0L);
    RecordResponse rawResponse = handler.doReadRecords(allocator, request);
    assertTrue(rawResponse instanceof RemoteReadRecordsResponse);
    try (RemoteReadRecordsResponse response = (RemoteReadRecordsResponse) rawResponse) {
        logger.info("doReadRecordsSpill: remoteBlocks[{}]", response.getRemoteBlocks().size());
        assertTrue(response.getNumberBlocks() > 1);
        int blockNum = 0;
        for (SpillLocation next : response.getRemoteBlocks()) {
            S3SpillLocation spillLocation = (S3SpillLocation) next;
            try (Block block = spillReader.read(spillLocation, response.getEncryptionKey(), response.getSchema())) {
                logger.info("doReadRecordsSpill: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount());
                // assertTrue(++blockNum < response.getRemoteBlocks().size() && block.getRowCount() > 10_000);
                logger.info("doReadRecordsSpill: {}", BlockUtils.rowToString(block, 0));
                assertNotNull(BlockUtils.rowToString(block, 0));
            }
        }
    }
}
Also used : ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) HashMap(java.util.HashMap) Matchers.anyString(org.mockito.Matchers.anyString) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) Result(org.apache.hadoop.hbase.client.Result) PutObjectResult(com.amazonaws.services.s3.model.PutObjectResult) GetTableResult(com.amazonaws.services.glue.model.GetTableResult) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) InvocationOnMock(org.mockito.invocation.InvocationOnMock) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) Block(com.amazonaws.athena.connector.lambda.data.Block) Scan(org.apache.hadoop.hbase.client.Scan) ResultProcessor(com.amazonaws.athena.connectors.hbase.connection.ResultProcessor) Split(com.amazonaws.athena.connector.lambda.domain.Split) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) Test(org.junit.Test)

Example 25 with RecordResponse

use of com.amazonaws.athena.connector.lambda.records.RecordResponse in project aws-athena-query-federation by awslabs.

the class RedisRecordHandlerTest method doReadRecordsHash.

@Test
public void doReadRecordsHash() throws Exception {
    // 4 keys per prefix
    when(mockSyncCommands.scan(any(ScanCursor.class), any(ScanArgs.class))).then((InvocationOnMock invocationOnMock) -> {
        ScanCursor cursor = (ScanCursor) invocationOnMock.getArguments()[0];
        if (cursor == null || cursor.getCursor().equals("0")) {
            List<String> result = new ArrayList<>();
            result.add(UUID.randomUUID().toString());
            result.add(UUID.randomUUID().toString());
            result.add(UUID.randomUUID().toString());
            result.add(UUID.randomUUID().toString());
            result.add(UUID.randomUUID().toString());
            MockKeyScanCursor<String> scanCursor = new MockKeyScanCursor<>();
            scanCursor.setCursor("1");
            scanCursor.setKeys(result);
            return scanCursor;
        } else {
            List<String> result = new ArrayList<>();
            result.add(UUID.randomUUID().toString());
            result.add(UUID.randomUUID().toString());
            MockKeyScanCursor<String> scanCursor = new MockKeyScanCursor<>();
            scanCursor.setCursor("0");
            scanCursor.setKeys(result);
            scanCursor.setFinished(true);
            return scanCursor;
        }
    });
    // 4 columns per key
    AtomicLong intColVal = new AtomicLong(0);
    when(mockSyncCommands.hgetall(anyString())).then((InvocationOnMock invocationOnMock) -> {
        Map<String, String> result = new HashMap<>();
        result.put("intcol", String.valueOf(intColVal.getAndIncrement()));
        result.put("stringcol", UUID.randomUUID().toString());
        result.put("extracol", UUID.randomUUID().toString());
        return result;
    });
    AtomicLong value = new AtomicLong(0);
    when(mockSyncCommands.get(anyString())).thenAnswer((InvocationOnMock invocationOnMock) -> String.valueOf(value.getAndIncrement()));
    S3SpillLocation splitLoc = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
    Split split = Split.newBuilder(splitLoc, keyFactory.create()).add(REDIS_ENDPOINT_PROP, endpoint).add(KEY_TYPE, KeyType.PREFIX.getId()).add(KEY_PREFIX_TABLE_PROP, "key-*").add(VALUE_TYPE_TABLE_PROP, ValueType.HASH.getId()).build();
    Schema schemaForRead = SchemaBuilder.newBuilder().addField("_key_", Types.MinorType.VARCHAR.getType()).addField("intcol", Types.MinorType.INT.getType()).addField("stringcol", Types.MinorType.VARCHAR.getType()).build();
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put("intcol", SortedRangeSet.copyOf(Types.MinorType.INT.getType(), ImmutableList.of(Range.greaterThan(allocator, Types.MinorType.INT.getType(), 1)), false));
    ReadRecordsRequest request = new ReadRecordsRequest(IDENTITY, DEFAULT_CATALOG, "queryId-" + System.currentTimeMillis(), TABLE_NAME, schemaForRead, split, new Constraints(constraintsMap), // 100GB don't expect this to spill
    100_000_000_000L, 100_000_000_000L);
    RecordResponse rawResponse = handler.doReadRecords(allocator, request);
    assertTrue(rawResponse instanceof ReadRecordsResponse);
    ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
    logger.info("doReadRecordsHash: rows[{}]", response.getRecordCount());
    logger.info("doReadRecordsHash: {}", BlockUtils.rowToString(response.getRecords(), 0));
    assertTrue(response.getRecords().getRowCount() == 5);
    assertTrue(response.getRecords().getFields().size() == schemaForRead.getFields().size());
    FieldReader keyReader = response.getRecords().getFieldReader(KEY_COLUMN_NAME);
    keyReader.setPosition(0);
    assertNotNull(keyReader.readText());
    FieldReader intCol = response.getRecords().getFieldReader("intcol");
    intCol.setPosition(0);
    assertNotNull(intCol.readInteger());
    FieldReader stringCol = response.getRecords().getFieldReader("stringcol");
    stringCol.setPosition(0);
    assertNotNull(stringCol.readText());
}
Also used : HashMap(java.util.HashMap) ReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse) ScanArgs(io.lettuce.core.ScanArgs) Schema(org.apache.arrow.vector.types.pojo.Schema) ArrayList(java.util.ArrayList) Matchers.anyString(org.mockito.Matchers.anyString) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) ScanCursor(io.lettuce.core.ScanCursor) MockScoredValueScanCursor(com.amazonaws.athena.connectors.redis.util.MockScoredValueScanCursor) MockKeyScanCursor(com.amazonaws.athena.connectors.redis.util.MockKeyScanCursor) AtomicLong(java.util.concurrent.atomic.AtomicLong) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) InvocationOnMock(org.mockito.invocation.InvocationOnMock) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) Split(com.amazonaws.athena.connector.lambda.domain.Split) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) FieldReader(org.apache.arrow.vector.complex.reader.FieldReader) MockKeyScanCursor(com.amazonaws.athena.connectors.redis.util.MockKeyScanCursor) Test(org.junit.Test)

Aggregations

RecordResponse (com.amazonaws.athena.connector.lambda.records.RecordResponse)37 Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)33 ReadRecordsRequest (com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest)33 Test (org.junit.Test)32 ReadRecordsResponse (com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse)27 ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)24 HashMap (java.util.HashMap)24 Matchers.anyString (org.mockito.Matchers.anyString)22 RemoteReadRecordsResponse (com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse)21 S3SpillLocation (com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation)20 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)19 Split (com.amazonaws.athena.connector.lambda.domain.Split)18 ArrayList (java.util.ArrayList)18 InvocationOnMock (org.mockito.invocation.InvocationOnMock)14 Block (com.amazonaws.athena.connector.lambda.data.Block)13 SpillLocation (com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation)11 Schema (org.apache.arrow.vector.types.pojo.Schema)10 EquatableValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet)9 InputStream (java.io.InputStream)7 List (java.util.List)7