Search in sources :

Example 31 with ReadRecordsResponse

use of com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse in project aws-athena-query-federation by awslabs.

the class DynamoDBRecordHandlerTest method testStructWithNullFromGlueTable.

@Test
public void testStructWithNullFromGlueTable() throws Exception {
    List<Column> columns = new ArrayList<>();
    columns.add(new Column().withName("col0").withType("string"));
    columns.add(new Column().withName("col1").withType("struct"));
    Map<String, String> param = ImmutableMap.of(SOURCE_TABLE_PROPERTY, TEST_TABLE4, COLUMN_NAME_MAPPING_PROPERTY, "col1=Col1,col2=Col2");
    Table table = new Table().withParameters(param).withPartitionKeys().withStorageDescriptor(new StorageDescriptor().withColumns(columns));
    GetTableResult mockResult = new GetTableResult().withTable(table);
    when(glueClient.getTable(any())).thenReturn(mockResult);
    TableName tableName = new TableName(DEFAULT_SCHEMA, TEST_TABLE4);
    GetTableRequest getTableRequest = new GetTableRequest(TEST_IDENTITY, TEST_QUERY_ID, TEST_CATALOG_NAME, tableName);
    GetTableResponse getTableResponse = metadataHandler.doGetTable(allocator, getTableRequest);
    logger.info("testStructWithNullFromGlueTable: GetTableResponse[{}]", getTableResponse);
    logger.info("testStructWithNullFromGlueTable: GetTableResponse Schema[{}]", getTableResponse.getSchema());
    Schema schema4 = getTableResponse.getSchema();
    for (Field f : schema4.getFields()) {
        if (f.getName().equals("Col2")) {
            assertEquals(2, f.getChildren().size());
            assertTrue(f.getType() instanceof ArrowType.Struct);
        }
    }
    Split split = Split.newBuilder(SPILL_LOCATION, keyFactory.create()).add(TABLE_METADATA, TEST_TABLE4).add(SEGMENT_ID_PROPERTY, "0").add(SEGMENT_COUNT_METADATA, "1").build();
    ReadRecordsRequest request = new ReadRecordsRequest(TEST_IDENTITY, TEST_CATALOG_NAME, TEST_QUERY_ID, TEST_TABLE_4_NAME, schema4, split, new Constraints(ImmutableMap.of()), // too big to spill
    100_000_000_000L, 100_000_000_000L);
    RecordResponse rawResponse = handler.doReadRecords(allocator, request);
    assertTrue(rawResponse instanceof ReadRecordsResponse);
    ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
    logger.info("testStructWithNullFromGlueTable: {}", BlockUtils.rowToString(response.getRecords(), 0));
    Block result = response.getRecords();
    assertEquals(1, result.getRowCount());
    assertEquals(schema4, result.getSchema());
    assertEquals("[Col0 : hashVal], [Col1 : {[field1 : someField1]}]", BlockUtils.rowToString(response.getRecords(), 0));
}
Also used : Table(com.amazonaws.services.glue.model.Table) ReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse) Schema(org.apache.arrow.vector.types.pojo.Schema) ArrayList(java.util.ArrayList) StorageDescriptor(com.amazonaws.services.glue.model.StorageDescriptor) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) Jackson.toJsonString(com.amazonaws.util.json.Jackson.toJsonString) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) GetTableRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableRequest) Field(org.apache.arrow.vector.types.pojo.Field) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) Column(com.amazonaws.services.glue.model.Column) GetTableResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableResponse) Block(com.amazonaws.athena.connector.lambda.data.Block) Split(com.amazonaws.athena.connector.lambda.domain.Split) GetTableResult(com.amazonaws.services.glue.model.GetTableResult) Test(org.junit.Test)

Example 32 with ReadRecordsResponse

use of com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse in project aws-athena-query-federation by awslabs.

the class DynamoDBRecordHandlerTest method testDateTimeSupportFromGlueTable.

@Test
public void testDateTimeSupportFromGlueTable() throws Exception {
    TimeZone.setDefault(TimeZone.getTimeZone("UTC"));
    List<Column> columns = new ArrayList<>();
    columns.add(new Column().withName("col0").withType("string"));
    columns.add(new Column().withName("col1").withType("timestamp"));
    columns.add(new Column().withName("col2").withType("timestamp"));
    columns.add(new Column().withName("col3").withType("date"));
    columns.add(new Column().withName("col4").withType("date"));
    columns.add(new Column().withName("col5").withType("timestamptz"));
    columns.add(new Column().withName("col6").withType("timestamptz"));
    columns.add(new Column().withName("col7").withType("timestamptz"));
    Map<String, String> param = ImmutableMap.of(SOURCE_TABLE_PROPERTY, TEST_TABLE3, COLUMN_NAME_MAPPING_PROPERTY, "col1=Col1 , col2=Col2 ,col3=Col3, col4=Col4,col5=Col5,col6=Col6,col7=Col7", DATETIME_FORMAT_MAPPING_PROPERTY, "col1=yyyyMMdd'S'HHmmss,col3=dd/MM/yyyy ");
    Table table = new Table().withParameters(param).withPartitionKeys().withStorageDescriptor(new StorageDescriptor().withColumns(columns));
    GetTableResult mockResult = new GetTableResult().withTable(table);
    when(glueClient.getTable(any())).thenReturn(mockResult);
    TableName tableName = new TableName(DEFAULT_SCHEMA, TEST_TABLE3);
    GetTableRequest getTableRequest = new GetTableRequest(TEST_IDENTITY, TEST_QUERY_ID, TEST_CATALOG_NAME, tableName);
    GetTableResponse getTableResponse = metadataHandler.doGetTable(allocator, getTableRequest);
    logger.info("testDateTimeSupportFromGlueTable: GetTableResponse[{}]", getTableResponse);
    logger.info("testDateTimeSupportFromGlueTable: GetTableResponse Schema[{}]", getTableResponse.getSchema());
    Schema schema3 = getTableResponse.getSchema();
    Split split = Split.newBuilder(SPILL_LOCATION, keyFactory.create()).add(TABLE_METADATA, TEST_TABLE3).add(SEGMENT_ID_PROPERTY, "0").add(SEGMENT_COUNT_METADATA, "1").build();
    ReadRecordsRequest request = new ReadRecordsRequest(TEST_IDENTITY, TEST_CATALOG_NAME, TEST_QUERY_ID, TEST_TABLE_3_NAME, schema3, split, new Constraints(ImmutableMap.of()), // too big to spill
    100_000_000_000L, 100_000_000_000L);
    RecordResponse rawResponse = handler.doReadRecords(allocator, request);
    assertTrue(rawResponse instanceof ReadRecordsResponse);
    ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
    LocalDate expectedDate = LocalDate.of(2020, 02, 27);
    LocalDateTime expectedDateTime = LocalDateTime.of(2020, 2, 27, 9, 12, 27);
    assertEquals(1, response.getRecords().getRowCount());
    assertEquals(expectedDateTime, response.getRecords().getFieldReader("Col1").readLocalDateTime());
    assertEquals(expectedDateTime, response.getRecords().getFieldReader("Col2").readLocalDateTime());
    assertEquals(expectedDate, LocalDate.ofEpochDay(response.getRecords().getFieldReader("Col3").readInteger()));
    assertEquals(expectedDate, LocalDate.ofEpochDay(response.getRecords().getFieldReader("Col4").readInteger()));
    assertEquals(getPackedDateTimeWithZone("2015-12-21T17:42:34-05:00"), response.getRecords().getFieldReader("Col5").readLong().longValue());
    assertEquals(getPackedDateTimeWithZone("2015-12-21T17:42:34Z"), response.getRecords().getFieldReader("Col6").readLong().longValue());
    assertEquals(getPackedDateTimeWithZone("2015-12-21T17:42:34Z"), response.getRecords().getFieldReader("Col7").readLong().longValue());
}
Also used : LocalDateTime(java.time.LocalDateTime) Table(com.amazonaws.services.glue.model.Table) ReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse) Schema(org.apache.arrow.vector.types.pojo.Schema) ArrayList(java.util.ArrayList) StorageDescriptor(com.amazonaws.services.glue.model.StorageDescriptor) Jackson.toJsonString(com.amazonaws.util.json.Jackson.toJsonString) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) LocalDate(java.time.LocalDate) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) GetTableRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableRequest) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) Column(com.amazonaws.services.glue.model.Column) GetTableResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableResponse) Split(com.amazonaws.athena.connector.lambda.domain.Split) GetTableResult(com.amazonaws.services.glue.model.GetTableResult) Test(org.junit.Test)

Example 33 with ReadRecordsResponse

use of com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse in project aws-athena-query-federation by awslabs.

the class RecordHandler method doReadRecords.

/**
 * Used to read the row data associated with the provided Split.
 *
 * @param allocator Tool for creating and managing Apache Arrow Blocks.
 * @param request Details of the read request, including:
 * 1. The Split
 * 2. The Catalog, Database, and Table the read request is for.
 * 3. The filtering predicate (if any)
 * 4. The columns required for projection.
 * @return A RecordResponse which either a ReadRecordsResponse or a RemoteReadRecordsResponse containing the row
 * data for the requested Split.
 */
public RecordResponse doReadRecords(BlockAllocator allocator, ReadRecordsRequest request) throws Exception {
    logger.info("doReadRecords: {}:{}", request.getSchema(), request.getSplit().getSpillLocation());
    SpillConfig spillConfig = getSpillConfig(request);
    try (ConstraintEvaluator evaluator = new ConstraintEvaluator(allocator, request.getSchema(), request.getConstraints());
        S3BlockSpiller spiller = new S3BlockSpiller(amazonS3, spillConfig, allocator, request.getSchema(), evaluator);
        QueryStatusChecker queryStatusChecker = new QueryStatusChecker(athena, athenaInvoker, request.getQueryId())) {
        readWithConstraint(spiller, request, queryStatusChecker);
        if (!spiller.spilled()) {
            return new ReadRecordsResponse(request.getCatalogName(), spiller.getBlock());
        } else {
            return new RemoteReadRecordsResponse(request.getCatalogName(), request.getSchema(), spiller.getSpillLocations(), spillConfig.getEncryptionKey());
        }
    }
}
Also used : SpillConfig(com.amazonaws.athena.connector.lambda.data.SpillConfig) RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) QueryStatusChecker(com.amazonaws.athena.connector.lambda.QueryStatusChecker) RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) ReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse) S3BlockSpiller(com.amazonaws.athena.connector.lambda.data.S3BlockSpiller) ConstraintEvaluator(com.amazonaws.athena.connector.lambda.domain.predicate.ConstraintEvaluator)

Aggregations

ReadRecordsResponse (com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse)33 ReadRecordsRequest (com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest)27 Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)26 RecordResponse (com.amazonaws.athena.connector.lambda.records.RecordResponse)26 Test (org.junit.Test)25 Split (com.amazonaws.athena.connector.lambda.domain.Split)17 ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)16 HashMap (java.util.HashMap)16 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)15 RemoteReadRecordsResponse (com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse)15 ArrayList (java.util.ArrayList)15 Matchers.anyString (org.mockito.Matchers.anyString)15 S3SpillLocation (com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation)12 Schema (org.apache.arrow.vector.types.pojo.Schema)11 InvocationOnMock (org.mockito.invocation.InvocationOnMock)11 Block (com.amazonaws.athena.connector.lambda.data.Block)7 BlockAllocatorImpl (com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl)6 GetTableResponse (com.amazonaws.athena.connector.lambda.metadata.GetTableResponse)6 InputStream (java.io.InputStream)6 GetTableRequest (com.amazonaws.athena.connector.lambda.metadata.GetTableRequest)5