Search in sources :

Example 1 with ResultProcessor

use of com.amazonaws.athena.connectors.hbase.connection.ResultProcessor in project aws-athena-query-federation by awslabs.

the class HbaseRecordHandlerTest method doReadRecordsNoSpill.

@Test
public void doReadRecordsNoSpill() throws Exception {
    List<Result> results = TestUtils.makeResults(100);
    ResultScanner mockScanner = mock(ResultScanner.class);
    when(mockScanner.iterator()).thenReturn(results.iterator());
    when(mockClient.scanTable(anyObject(), any(Scan.class), anyObject())).thenAnswer((InvocationOnMock invocationOnMock) -> {
        ResultProcessor processor = (ResultProcessor) invocationOnMock.getArguments()[2];
        return processor.scan(mockScanner);
    });
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put("family1:col3", SortedRangeSet.copyOf(Types.MinorType.BIGINT.getType(), ImmutableList.of(Range.equal(allocator, Types.MinorType.BIGINT.getType(), 1L)), false));
    S3SpillLocation splitLoc = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
    Split.Builder splitBuilder = Split.newBuilder(splitLoc, keyFactory.create()).add(HBASE_CONN_STR, "fake_con_str").add(START_KEY_FIELD, "fake_start_key").add(END_KEY_FIELD, "fake_end_key").add(REGION_ID_FIELD, "fake_region_id").add(REGION_NAME_FIELD, "fake_region_name");
    ReadRecordsRequest request = new ReadRecordsRequest(IDENTITY, DEFAULT_CATALOG, "queryId-" + System.currentTimeMillis(), new TableName(DEFAULT_SCHEMA, TEST_TABLE), schemaForRead, splitBuilder.build(), new Constraints(constraintsMap), // 100GB don't expect this to spill
    100_000_000_000L, 100_000_000_000L);
    RecordResponse rawResponse = handler.doReadRecords(allocator, request);
    assertTrue(rawResponse instanceof ReadRecordsResponse);
    ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
    logger.info("doReadRecordsNoSpill: rows[{}]", response.getRecordCount());
    assertTrue(response.getRecords().getRowCount() == 1);
    logger.info("doReadRecordsNoSpill: {}", BlockUtils.rowToString(response.getRecords(), 0));
}
Also used : ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) HashMap(java.util.HashMap) ReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse) RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) Matchers.anyString(org.mockito.Matchers.anyString) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) Result(org.apache.hadoop.hbase.client.Result) PutObjectResult(com.amazonaws.services.s3.model.PutObjectResult) GetTableResult(com.amazonaws.services.glue.model.GetTableResult) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) InvocationOnMock(org.mockito.invocation.InvocationOnMock) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) Scan(org.apache.hadoop.hbase.client.Scan) ResultProcessor(com.amazonaws.athena.connectors.hbase.connection.ResultProcessor) Split(com.amazonaws.athena.connector.lambda.domain.Split) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) Test(org.junit.Test)

Example 2 with ResultProcessor

use of com.amazonaws.athena.connectors.hbase.connection.ResultProcessor in project aws-athena-query-federation by awslabs.

the class HbaseSchemaUtilsTest method inferSchema.

@Test
public void inferSchema() throws IOException {
    int numToScan = 4;
    TableName tableName = new TableName("schema", "table");
    List<Result> results = TestUtils.makeResults();
    HBaseConnection mockConnection = mock(HBaseConnection.class);
    ResultScanner mockScanner = mock(ResultScanner.class);
    when(mockScanner.iterator()).thenReturn(results.iterator());
    when(mockConnection.scanTable(anyObject(), any(Scan.class), anyObject())).thenAnswer((InvocationOnMock invocationOnMock) -> {
        ResultProcessor processor = (ResultProcessor) invocationOnMock.getArguments()[2];
        return processor.scan(mockScanner);
    });
    Schema schema = HbaseSchemaUtils.inferSchema(mockConnection, tableName, numToScan);
    Map<String, Types.MinorType> actualFields = new HashMap<>();
    schema.getFields().stream().forEach(next -> actualFields.put(next.getName(), Types.getMinorTypeForArrowType(next.getType())));
    Map<String, Types.MinorType> expectedFields = new HashMap<>();
    TestUtils.makeSchema().build().getFields().stream().forEach(next -> expectedFields.put(next.getName(), Types.getMinorTypeForArrowType(next.getType())));
    for (Map.Entry<String, Types.MinorType> nextExpected : expectedFields.entrySet()) {
        assertNotNull(actualFields.get(nextExpected.getKey()));
        assertEquals(nextExpected.getKey(), nextExpected.getValue(), actualFields.get(nextExpected.getKey()));
    }
    assertEquals(expectedFields.size(), actualFields.size());
    verify(mockConnection, times(1)).scanTable(anyObject(), any(Scan.class), any(ResultProcessor.class));
    verify(mockScanner, times(1)).iterator();
}
Also used : ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) HashMap(java.util.HashMap) Schema(org.apache.arrow.vector.types.pojo.Schema) Result(org.apache.hadoop.hbase.client.Result) TestUtils.makeResult(com.amazonaws.athena.connectors.hbase.TestUtils.makeResult) HBaseConnection(com.amazonaws.athena.connectors.hbase.connection.HBaseConnection) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) InvocationOnMock(org.mockito.invocation.InvocationOnMock) Scan(org.apache.hadoop.hbase.client.Scan) ResultProcessor(com.amazonaws.athena.connectors.hbase.connection.ResultProcessor) HashMap(java.util.HashMap) Map(java.util.Map) Test(org.junit.Test)

Example 3 with ResultProcessor

use of com.amazonaws.athena.connectors.hbase.connection.ResultProcessor in project aws-athena-query-federation by awslabs.

the class HbaseMetadataHandlerTest method doGetTable.

/**
 * TODO: Add more types.
 */
@Test
public void doGetTable() throws Exception {
    List<Result> results = TestUtils.makeResults();
    ResultScanner mockScanner = mock(ResultScanner.class);
    when(mockScanner.iterator()).thenReturn(results.iterator());
    when(mockClient.scanTable(anyObject(), any(Scan.class), anyObject())).thenAnswer((InvocationOnMock invocationOnMock) -> {
        ResultProcessor processor = (ResultProcessor) invocationOnMock.getArguments()[2];
        return processor.scan(mockScanner);
    });
    GetTableRequest req = new GetTableRequest(IDENTITY, QUERY_ID, DEFAULT_CATALOG, TABLE_NAME);
    GetTableResponse res = handler.doGetTable(allocator, req);
    logger.info("doGetTable - {}", res);
    Schema expectedSchema = TestUtils.makeSchema().addField(HbaseSchemaUtils.ROW_COLUMN_NAME, Types.MinorType.VARCHAR.getType()).build();
    assertEquals(expectedSchema.getFields().size(), res.getSchema().getFields().size());
}
Also used : GetTableRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableRequest) ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) GetTableResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableResponse) InvocationOnMock(org.mockito.invocation.InvocationOnMock) Schema(org.apache.arrow.vector.types.pojo.Schema) Scan(org.apache.hadoop.hbase.client.Scan) ResultProcessor(com.amazonaws.athena.connectors.hbase.connection.ResultProcessor) Result(org.apache.hadoop.hbase.client.Result) Test(org.junit.Test)

Example 4 with ResultProcessor

use of com.amazonaws.athena.connectors.hbase.connection.ResultProcessor in project aws-athena-query-federation by awslabs.

the class HbaseRecordHandlerTest method doReadRecordsSpill.

@Test
public void doReadRecordsSpill() throws Exception {
    List<Result> results = TestUtils.makeResults(10_000);
    ResultScanner mockScanner = mock(ResultScanner.class);
    when(mockScanner.iterator()).thenReturn(results.iterator());
    when(mockClient.scanTable(anyObject(), any(Scan.class), anyObject())).thenAnswer((InvocationOnMock invocationOnMock) -> {
        ResultProcessor processor = (ResultProcessor) invocationOnMock.getArguments()[2];
        return processor.scan(mockScanner);
    });
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put("family1:col3", SortedRangeSet.copyOf(Types.MinorType.BIGINT.getType(), ImmutableList.of(Range.greaterThan(allocator, Types.MinorType.BIGINT.getType(), 0L)), true));
    S3SpillLocation splitLoc = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
    Split.Builder splitBuilder = Split.newBuilder(splitLoc, keyFactory.create()).add(HBASE_CONN_STR, "fake_con_str").add(START_KEY_FIELD, "fake_start_key").add(END_KEY_FIELD, "fake_end_key").add(REGION_ID_FIELD, "fake_region_id").add(REGION_NAME_FIELD, "fake_region_name");
    ReadRecordsRequest request = new ReadRecordsRequest(IDENTITY, DEFAULT_CATALOG, "queryId-" + System.currentTimeMillis(), new TableName(DEFAULT_SCHEMA, TEST_TABLE), schemaForRead, splitBuilder.build(), new Constraints(constraintsMap), // ~1.5MB so we should see some spill
    1_500_000L, 0L);
    RecordResponse rawResponse = handler.doReadRecords(allocator, request);
    assertTrue(rawResponse instanceof RemoteReadRecordsResponse);
    try (RemoteReadRecordsResponse response = (RemoteReadRecordsResponse) rawResponse) {
        logger.info("doReadRecordsSpill: remoteBlocks[{}]", response.getRemoteBlocks().size());
        assertTrue(response.getNumberBlocks() > 1);
        int blockNum = 0;
        for (SpillLocation next : response.getRemoteBlocks()) {
            S3SpillLocation spillLocation = (S3SpillLocation) next;
            try (Block block = spillReader.read(spillLocation, response.getEncryptionKey(), response.getSchema())) {
                logger.info("doReadRecordsSpill: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount());
                // assertTrue(++blockNum < response.getRemoteBlocks().size() && block.getRowCount() > 10_000);
                logger.info("doReadRecordsSpill: {}", BlockUtils.rowToString(block, 0));
                assertNotNull(BlockUtils.rowToString(block, 0));
            }
        }
    }
}
Also used : ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) HashMap(java.util.HashMap) Matchers.anyString(org.mockito.Matchers.anyString) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) Result(org.apache.hadoop.hbase.client.Result) PutObjectResult(com.amazonaws.services.s3.model.PutObjectResult) GetTableResult(com.amazonaws.services.glue.model.GetTableResult) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) InvocationOnMock(org.mockito.invocation.InvocationOnMock) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) Block(com.amazonaws.athena.connector.lambda.data.Block) Scan(org.apache.hadoop.hbase.client.Scan) ResultProcessor(com.amazonaws.athena.connectors.hbase.connection.ResultProcessor) Split(com.amazonaws.athena.connector.lambda.domain.Split) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) Test(org.junit.Test)

Aggregations

ResultProcessor (com.amazonaws.athena.connectors.hbase.connection.ResultProcessor)4 Result (org.apache.hadoop.hbase.client.Result)4 ResultScanner (org.apache.hadoop.hbase.client.ResultScanner)4 Scan (org.apache.hadoop.hbase.client.Scan)4 Test (org.junit.Test)4 InvocationOnMock (org.mockito.invocation.InvocationOnMock)4 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)3 HashMap (java.util.HashMap)3 Split (com.amazonaws.athena.connector.lambda.domain.Split)2 Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)2 ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)2 S3SpillLocation (com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation)2 ReadRecordsRequest (com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest)2 RecordResponse (com.amazonaws.athena.connector.lambda.records.RecordResponse)2 RemoteReadRecordsResponse (com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse)2 GetTableResult (com.amazonaws.services.glue.model.GetTableResult)2 PutObjectResult (com.amazonaws.services.s3.model.PutObjectResult)2 Schema (org.apache.arrow.vector.types.pojo.Schema)2 Matchers.anyString (org.mockito.Matchers.anyString)2 Block (com.amazonaws.athena.connector.lambda.data.Block)1