Search in sources :

Example 26 with GetTableRequest

use of com.amazonaws.athena.connector.lambda.metadata.GetTableRequest in project aws-athena-query-federation by awslabs.

the class DynamoDBMetadataHandlerTest method doGetTable.

@Test
public void doGetTable() throws Exception {
    when(glueClient.getTable(any())).thenThrow(new AmazonServiceException(""));
    GetTableRequest req = new GetTableRequest(TEST_IDENTITY, TEST_QUERY_ID, TEST_CATALOG_NAME, TEST_TABLE_NAME);
    GetTableResponse res = handler.doGetTable(allocator, req);
    logger.info("doGetTable - {}", res.getSchema());
    assertThat(res.getTableName().getSchemaName(), equalTo(DEFAULT_SCHEMA));
    assertThat(res.getTableName().getTableName(), equalTo(TEST_TABLE));
    assertThat(res.getSchema().getFields().size(), equalTo(11));
}
Also used : GetTableRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableRequest) GetTableResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableResponse) AmazonServiceException(com.amazonaws.AmazonServiceException) Test(org.junit.Test)

Example 27 with GetTableRequest

use of com.amazonaws.athena.connector.lambda.metadata.GetTableRequest in project aws-athena-query-federation by awslabs.

the class DynamoDBMetadataHandlerTest method testCaseInsensitiveResolve.

@Test
public void testCaseInsensitiveResolve() throws Exception {
    when(glueClient.getTable(any())).thenThrow(new AmazonServiceException(""));
    GetTableRequest req = new GetTableRequest(TEST_IDENTITY, TEST_QUERY_ID, TEST_CATALOG_NAME, TEST_TABLE_2_NAME);
    GetTableResponse res = handler.doGetTable(allocator, req);
    logger.info("doGetTable - {}", res.getSchema());
    assertThat(res.getTableName(), equalTo(TEST_TABLE_2_NAME));
}
Also used : GetTableRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableRequest) GetTableResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableResponse) AmazonServiceException(com.amazonaws.AmazonServiceException) Test(org.junit.Test)

Example 28 with GetTableRequest

use of com.amazonaws.athena.connector.lambda.metadata.GetTableRequest in project aws-athena-query-federation by awslabs.

the class LambdaMetadataProvider method getTable.

/**
 * This method builds and executes a GetTableRequest against the specified Lambda function.
 *
 * @param catalog the catalog name to be passed to Lambda
 * @param tableName the schema-qualified table name indicating which table should be retrieved
 * @param metadataFunction the name of the Lambda function to call
 * @param identity the identity of the caller
 * @return the response
 */
public static GetTableResponse getTable(String catalog, TableName tableName, String metadataFunction, FederatedIdentity identity) {
    String queryId = generateQueryId();
    log.info("Submitting GetTableRequest with ID " + queryId);
    try (GetTableRequest request = new GetTableRequest(identity, queryId, catalog, tableName)) {
        log.info("Submitting request: {}", request);
        GetTableResponse response = (GetTableResponse) getService(metadataFunction, identity, catalog).call(request);
        log.info("Received response: {}", response);
        return response;
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}
Also used : GetTableRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableRequest) GetTableResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableResponse)

Example 29 with GetTableRequest

use of com.amazonaws.athena.connector.lambda.metadata.GetTableRequest in project aws-athena-query-federation by awslabs.

the class GlueMetadataHandler method doGetTable.

/**
 * Attempts to retrieve a Table (columns and properties) from AWS Glue for the request schema (aka database) and table
 * name with no filtering.
 *
 * @param blockAllocator Tool for creating and managing Apache Arrow Blocks.
 * @param request Provides details on who made the request and which Athena catalog, database, and table they are querying.
 * @param filter The TableFilter to apply to any matching table before generating the result.
 * @return A GetTableResponse mostly containing the columns, their types, and any table properties for the requested table.
 * @note This method throws a RuntimeException if not table matching the requested criteria (and filter) is found.
 */
protected GetTableResponse doGetTable(BlockAllocator blockAllocator, GetTableRequest request, TableFilter filter) throws Exception {
    TableName tableName = request.getTableName();
    com.amazonaws.services.glue.model.GetTableRequest getTableRequest = new com.amazonaws.services.glue.model.GetTableRequest();
    getTableRequest.setCatalogId(getCatalog(request));
    getTableRequest.setDatabaseName(tableName.getSchemaName());
    getTableRequest.setName(tableName.getTableName());
    GetTableResult result = awsGlue.getTable(getTableRequest);
    Table table = result.getTable();
    if (filter != null && !filter.filter(table)) {
        throw new RuntimeException("No matching table found " + request.getTableName());
    }
    SchemaBuilder schemaBuilder = SchemaBuilder.newBuilder();
    if (table.getParameters() != null) {
        table.getParameters().entrySet().forEach(next -> schemaBuilder.addMetadata(next.getKey(), next.getValue()));
    }
    // A column name mapping can be provided to get around restrictive Glue naming rules
    Map<String, String> columnNameMapping = getColumnNameMapping(table);
    Map<String, String> dateTimeFormatMapping = getDateTimeFormatMapping(table);
    Map<String, String> datetimeFormatMappingWithColumnName = new HashMap<>();
    Set<String> partitionCols = new HashSet<>();
    if (table.getPartitionKeys() != null) {
        partitionCols = table.getPartitionKeys().stream().map(next -> columnNameMapping.getOrDefault(next.getName(), next.getName())).collect(Collectors.toSet());
    }
    for (Column next : table.getStorageDescriptor().getColumns()) {
        String rawColumnName = next.getName();
        String mappedColumnName = columnNameMapping.getOrDefault(rawColumnName, rawColumnName);
        // apply any type override provided in typeOverrideMapping from metadata
        // this is currently only used for timestamp with timezone support
        logger.info("Column {} with registered type {}", rawColumnName, next.getType());
        schemaBuilder.addField(convertField(mappedColumnName, next.getType()));
        // Add non-null non-empty comments to metadata
        if (next.getComment() != null && !next.getComment().trim().isEmpty()) {
            schemaBuilder.addMetadata(mappedColumnName, next.getComment());
        }
        if (dateTimeFormatMapping.containsKey(rawColumnName)) {
            datetimeFormatMappingWithColumnName.put(mappedColumnName, dateTimeFormatMapping.get(rawColumnName));
        }
    }
    populateDatetimeFormatMappingIfAvailable(schemaBuilder, datetimeFormatMappingWithColumnName);
    populateSourceTableNameIfAvailable(table, schemaBuilder);
    if (table.getViewOriginalText() != null && !table.getViewOriginalText().isEmpty()) {
        schemaBuilder.addMetadata(VIEW_METADATA_FIELD, table.getViewOriginalText());
    }
    return new GetTableResponse(request.getCatalogName(), request.getTableName(), schemaBuilder.build(), partitionCols);
}
Also used : Table(com.amazonaws.services.glue.model.Table) HashMap(java.util.HashMap) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) GetTableRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableRequest) Column(com.amazonaws.services.glue.model.Column) GetTableResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableResponse) SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) GetTableResult(com.amazonaws.services.glue.model.GetTableResult) HashSet(java.util.HashSet)

Example 30 with GetTableRequest

use of com.amazonaws.athena.connector.lambda.metadata.GetTableRequest in project aws-athena-query-federation by awslabs.

the class GlueMetadataHandlerTest method doGetTableEmptyComment.

@Test
public void doGetTableEmptyComment() throws Exception {
    String sourceTable = "My-Table";
    Map<String, String> expectedParams = new HashMap<>();
    expectedParams.put(SOURCE_TABLE_PROPERTY, sourceTable);
    // Put in a conflicting parameter
    expectedParams.put("col1", "col1");
    List<Column> columns = new ArrayList<>();
    columns.add(new Column().withName("col1").withType("int").withComment(" "));
    Table mockTable = mock(Table.class);
    StorageDescriptor mockSd = mock(StorageDescriptor.class);
    when(mockTable.getName()).thenReturn(table);
    when(mockTable.getStorageDescriptor()).thenReturn(mockSd);
    when(mockTable.getParameters()).thenReturn(expectedParams);
    when(mockSd.getColumns()).thenReturn(columns);
    when(mockGlue.getTable(any(com.amazonaws.services.glue.model.GetTableRequest.class))).thenAnswer((InvocationOnMock invocationOnMock) -> {
        com.amazonaws.services.glue.model.GetTableRequest request = (com.amazonaws.services.glue.model.GetTableRequest) invocationOnMock.getArguments()[0];
        assertEquals(accountId, request.getCatalogId());
        assertEquals(schema, request.getDatabaseName());
        assertEquals(table, request.getName());
        GetTableResult mockResult = mock(GetTableResult.class);
        when(mockResult.getTable()).thenReturn(mockTable);
        return mockResult;
    });
    GetTableRequest req = new GetTableRequest(IdentityUtil.fakeIdentity(), queryId, catalog, new TableName(schema, table));
    GetTableResponse res = handler.doGetTable(allocator, req);
    logger.info("doGetTable - {}", res);
    // Verify column name mapping works
    assertNotNull(res.getSchema().findField("col1"));
    // Verify types
    assertTrue(Types.getMinorTypeForArrowType(res.getSchema().findField("col1").getType()).equals(Types.MinorType.INT));
}
Also used : Table(com.amazonaws.services.glue.model.Table) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) StorageDescriptor(com.amazonaws.services.glue.model.StorageDescriptor) GetTableRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableRequest) GlueMetadataHandler.getSourceTableName(com.amazonaws.athena.connector.lambda.handlers.GlueMetadataHandler.getSourceTableName) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Column(com.amazonaws.services.glue.model.Column) GetTableResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableResponse) InvocationOnMock(org.mockito.invocation.InvocationOnMock) GetTableResult(com.amazonaws.services.glue.model.GetTableResult) Test(org.junit.Test)

Aggregations

GetTableRequest (com.amazonaws.athena.connector.lambda.metadata.GetTableRequest)51 Test (org.junit.Test)48 GetTableResponse (com.amazonaws.athena.connector.lambda.metadata.GetTableResponse)33 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)30 ArrayList (java.util.ArrayList)13 GetTableResult (com.amazonaws.services.glue.model.GetTableResult)11 Column (com.amazonaws.services.glue.model.Column)10 Schema (org.apache.arrow.vector.types.pojo.Schema)10 StorageDescriptor (com.amazonaws.services.glue.model.StorageDescriptor)9 Table (com.amazonaws.services.glue.model.Table)8 InvocationOnMock (org.mockito.invocation.InvocationOnMock)8 SchemaBuilder (com.amazonaws.athena.connector.lambda.data.SchemaBuilder)7 Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)7 HashMap (java.util.HashMap)7 BlockAllocatorImpl (com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl)6 ResultSet (java.sql.ResultSet)6 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)6 Field (org.apache.arrow.vector.types.pojo.Field)6 ReadRecordsRequest (com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest)5 ReadRecordsResponse (com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse)4