Search in sources :

Example 66 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class HbaseRecordHandler method scanFilterProject.

private boolean scanFilterProject(ResultScanner scanner, ReadRecordsRequest request, BlockSpiller blockSpiller, QueryStatusChecker queryStatusChecker) {
    Schema projection = request.getSchema();
    boolean isNative = projection.getCustomMetadata().get(HBASE_NATIVE_STORAGE_FLAG) != null;
    for (Result row : scanner) {
        if (!queryStatusChecker.isQueryRunning()) {
            return true;
        }
        blockSpiller.writeRows((Block block, int rowNum) -> {
            boolean match = true;
            for (Field field : projection.getFields()) {
                if (match) {
                    match &= writeField(block, field, isNative, row, rowNum);
                }
            }
            return match ? 1 : 0;
        });
    }
    return true;
}
Also used : Field(org.apache.arrow.vector.types.pojo.Field) Schema(org.apache.arrow.vector.types.pojo.Schema) Block(com.amazonaws.athena.connector.lambda.data.Block) Result(org.apache.hadoop.hbase.client.Result)

Example 67 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class HbaseMetadataHandlerTest method doGetTableLayout.

@Test
public void doGetTableLayout() throws Exception {
    GetTableLayoutRequest req = new GetTableLayoutRequest(IDENTITY, QUERY_ID, DEFAULT_CATALOG, TABLE_NAME, new Constraints(new HashMap<>()), SchemaBuilder.newBuilder().build(), Collections.EMPTY_SET);
    GetTableLayoutResponse res = handler.doGetTableLayout(allocator, req);
    logger.info("doGetTableLayout - {}", res);
    Block partitions = res.getPartitions();
    for (int row = 0; row < partitions.getRowCount() && row < 10; row++) {
        logger.info("doGetTableLayout:{} {}", row, BlockUtils.rowToString(partitions, row));
    }
    assertTrue(partitions.getRowCount() > 0);
}
Also used : Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) GetTableLayoutResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse) HashMap(java.util.HashMap) GetTableLayoutRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest) Block(com.amazonaws.athena.connector.lambda.data.Block) Test(org.junit.Test)

Example 68 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class HbaseRecordHandlerTest method doReadRecordsSpill.

@Test
public void doReadRecordsSpill() throws Exception {
    List<Result> results = TestUtils.makeResults(10_000);
    ResultScanner mockScanner = mock(ResultScanner.class);
    when(mockScanner.iterator()).thenReturn(results.iterator());
    when(mockClient.scanTable(anyObject(), any(Scan.class), anyObject())).thenAnswer((InvocationOnMock invocationOnMock) -> {
        ResultProcessor processor = (ResultProcessor) invocationOnMock.getArguments()[2];
        return processor.scan(mockScanner);
    });
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put("family1:col3", SortedRangeSet.copyOf(Types.MinorType.BIGINT.getType(), ImmutableList.of(Range.greaterThan(allocator, Types.MinorType.BIGINT.getType(), 0L)), true));
    S3SpillLocation splitLoc = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
    Split.Builder splitBuilder = Split.newBuilder(splitLoc, keyFactory.create()).add(HBASE_CONN_STR, "fake_con_str").add(START_KEY_FIELD, "fake_start_key").add(END_KEY_FIELD, "fake_end_key").add(REGION_ID_FIELD, "fake_region_id").add(REGION_NAME_FIELD, "fake_region_name");
    ReadRecordsRequest request = new ReadRecordsRequest(IDENTITY, DEFAULT_CATALOG, "queryId-" + System.currentTimeMillis(), new TableName(DEFAULT_SCHEMA, TEST_TABLE), schemaForRead, splitBuilder.build(), new Constraints(constraintsMap), // ~1.5MB so we should see some spill
    1_500_000L, 0L);
    RecordResponse rawResponse = handler.doReadRecords(allocator, request);
    assertTrue(rawResponse instanceof RemoteReadRecordsResponse);
    try (RemoteReadRecordsResponse response = (RemoteReadRecordsResponse) rawResponse) {
        logger.info("doReadRecordsSpill: remoteBlocks[{}]", response.getRemoteBlocks().size());
        assertTrue(response.getNumberBlocks() > 1);
        int blockNum = 0;
        for (SpillLocation next : response.getRemoteBlocks()) {
            S3SpillLocation spillLocation = (S3SpillLocation) next;
            try (Block block = spillReader.read(spillLocation, response.getEncryptionKey(), response.getSchema())) {
                logger.info("doReadRecordsSpill: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount());
                // assertTrue(++blockNum < response.getRemoteBlocks().size() && block.getRowCount() > 10_000);
                logger.info("doReadRecordsSpill: {}", BlockUtils.rowToString(block, 0));
                assertNotNull(BlockUtils.rowToString(block, 0));
            }
        }
    }
}
Also used : ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) HashMap(java.util.HashMap) Matchers.anyString(org.mockito.Matchers.anyString) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) Result(org.apache.hadoop.hbase.client.Result) PutObjectResult(com.amazonaws.services.s3.model.PutObjectResult) GetTableResult(com.amazonaws.services.glue.model.GetTableResult) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) InvocationOnMock(org.mockito.invocation.InvocationOnMock) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) Block(com.amazonaws.athena.connector.lambda.data.Block) Scan(org.apache.hadoop.hbase.client.Scan) ResultProcessor(com.amazonaws.athena.connectors.hbase.connection.ResultProcessor) Split(com.amazonaws.athena.connector.lambda.domain.Split) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) Test(org.junit.Test)

Example 69 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class ReadRecordsResponseSerDeTest method beforeTest.

@Before
public void beforeTest() throws IOException {
    String yearCol = "year";
    String monthCol = "month";
    String dayCol = "day";
    Schema schema = SchemaBuilder.newBuilder().addField(yearCol, new ArrowType.Int(32, true)).addField(monthCol, new ArrowType.Int(32, true)).addField(dayCol, new ArrowType.Int(32, true)).build();
    Block records = allocator.createBlock(schema);
    int num_records = 10;
    for (int i = 0; i < num_records; i++) {
        BlockUtils.setValue(records.getFieldVector(yearCol), i, 2016 + i);
        BlockUtils.setValue(records.getFieldVector(monthCol), i, (i % 12) + 1);
        BlockUtils.setValue(records.getFieldVector(dayCol), i, (i % 28) + 1);
    }
    records.setRowCount(num_records);
    expected = new ReadRecordsResponse("test-catalog", records);
    String expectedSerDeFile = utils.getResourceOrFail("serde/v2", "ReadRecordsResponse.json");
    expectedSerDeText = utils.readAllAsString(expectedSerDeFile).trim();
}
Also used : ReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse) Schema(org.apache.arrow.vector.types.pojo.Schema) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) Block(com.amazonaws.athena.connector.lambda.data.Block) Before(org.junit.Before)

Example 70 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class UserDefinedFunctionRequestSerDeTest method beforeTest.

@Before
public void beforeTest() throws IOException {
    Schema inputSchema = SchemaBuilder.newBuilder().addField("factor1", Types.MinorType.INT.getType()).addField("factor2", Types.MinorType.INT.getType()).build();
    Schema outputSchema = SchemaBuilder.newBuilder().addField("product", Types.MinorType.INT.getType()).build();
    Block inputRecords = allocator.createBlock(inputSchema);
    inputRecords.setRowCount(1);
    IntVector inputVector1 = (IntVector) inputRecords.getFieldVector("factor1");
    IntVector inputVector2 = (IntVector) inputRecords.getFieldVector("factor2");
    inputVector1.setSafe(0, 2);
    inputVector2.setSafe(0, 3);
    expected = new UserDefinedFunctionRequest(federatedIdentity, inputRecords, outputSchema, "test-method", UserDefinedFunctionType.SCALAR);
    String expectedSerDeFile = utils.getResourceOrFail("serde/v2", "UserDefinedFunctionRequest.json");
    expectedSerDeText = utils.readAllAsString(expectedSerDeFile).trim();
}
Also used : UserDefinedFunctionRequest(com.amazonaws.athena.connector.lambda.udf.UserDefinedFunctionRequest) IntVector(org.apache.arrow.vector.IntVector) Schema(org.apache.arrow.vector.types.pojo.Schema) Block(com.amazonaws.athena.connector.lambda.data.Block) Before(org.junit.Before)

Aggregations

Block (com.amazonaws.athena.connector.lambda.data.Block)113 Test (org.junit.Test)39 HashMap (java.util.HashMap)35 Schema (org.apache.arrow.vector.types.pojo.Schema)35 Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)32 Split (com.amazonaws.athena.connector.lambda.domain.Split)31 GetSplitsResponse (com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse)28 FieldReader (org.apache.arrow.vector.complex.reader.FieldReader)28 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)27 SpillLocation (com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation)23 HashSet (java.util.HashSet)23 ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)20 Field (org.apache.arrow.vector.types.pojo.Field)17 GetSplitsRequest (com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest)13 PreparedStatement (java.sql.PreparedStatement)13 ResultSet (java.sql.ResultSet)13 ArrayList (java.util.ArrayList)13 MetadataResponse (com.amazonaws.athena.connector.lambda.metadata.MetadataResponse)12 Connection (java.sql.Connection)12 ReadRecordsRequest (com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest)11